@rebasepro/schema-inference 0.0.1-canary.4d4fb3e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +114 -0
- package/README.md +1 -0
- package/dist/builders/reference_property_builder.d.ts +3 -0
- package/dist/builders/string_property_builder.d.ts +3 -0
- package/dist/builders/validation_builder.d.ts +3 -0
- package/dist/collection_builder.d.ts +6 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.es.js +577 -0
- package/dist/index.es.js.map +1 -0
- package/dist/index.umd.cjs +581 -0
- package/dist/index.umd.cjs.map +1 -0
- package/dist/strings.d.ts +19 -0
- package/dist/test_schemas/test_schema.d.ts +1 -0
- package/dist/types.d.ts +35 -0
- package/dist/util.d.ts +11 -0
- package/package.json +37 -0
- package/src/builders/reference_property_builder.ts +18 -0
- package/src/builders/string_property_builder.ts +115 -0
- package/src/builders/validation_builder.ts +18 -0
- package/src/collection_builder.ts +404 -0
- package/src/index.ts +3 -0
- package/src/strings.ts +103 -0
- package/src/test_schemas/pop_products.json +948 -0
- package/src/test_schemas/test_schema.ts +32 -0
- package/src/types.ts +43 -0
- package/src/util.ts +116 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { ValuesCountEntry } from "./types";
|
|
2
|
+
/**
|
|
3
|
+
* Parse a reference string value which can be in the format:
|
|
4
|
+
* - Simple: "path/entityId"
|
|
5
|
+
* - With database: "database_name:::path/entityId"
|
|
6
|
+
* Returns the path and database (undefined if not specified or if "(default)")
|
|
7
|
+
*/
|
|
8
|
+
export declare function parseReferenceString(value: string): {
|
|
9
|
+
path: string;
|
|
10
|
+
database?: string;
|
|
11
|
+
} | null;
|
|
12
|
+
/**
|
|
13
|
+
* Check if a string value looks like a reference
|
|
14
|
+
*/
|
|
15
|
+
export declare function looksLikeReference(value: any): boolean;
|
|
16
|
+
export declare function findCommonInitialStringInPath(valuesCount?: ValuesCountEntry): string | undefined;
|
|
17
|
+
export declare function removeInitialAndTrailingSlashes(s: string): string;
|
|
18
|
+
export declare function removeInitialSlash(s: string): string;
|
|
19
|
+
export declare function removeTrailingSlash(s: string): string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { DataType } from "@rebasepro/types";
|
|
2
|
+
export type TypesCount = {
|
|
3
|
+
number?: number;
|
|
4
|
+
string?: number;
|
|
5
|
+
boolean?: number;
|
|
6
|
+
map?: TypesCountRecord;
|
|
7
|
+
array?: TypesCount;
|
|
8
|
+
date?: number;
|
|
9
|
+
geopoint?: number;
|
|
10
|
+
reference?: number;
|
|
11
|
+
relation?: number;
|
|
12
|
+
};
|
|
13
|
+
export type TypesCountRecord<K extends keyof DataType = any> = {
|
|
14
|
+
[P in K]: TypesCount;
|
|
15
|
+
};
|
|
16
|
+
export type ValuesCountEntry = {
|
|
17
|
+
values: any[];
|
|
18
|
+
valuesCount: Map<any, number>;
|
|
19
|
+
mapValues?: ValuesCountRecord;
|
|
20
|
+
};
|
|
21
|
+
export type ValuesCountRecord = Record<string, ValuesCountEntry>;
|
|
22
|
+
export type InferencePropertyBuilderProps = {
|
|
23
|
+
/**
|
|
24
|
+
* Name of the property
|
|
25
|
+
*/
|
|
26
|
+
name: string;
|
|
27
|
+
/**
|
|
28
|
+
* Total documents this props are built from
|
|
29
|
+
*/
|
|
30
|
+
totalDocsCount: number;
|
|
31
|
+
/**
|
|
32
|
+
* How many times does each value show up
|
|
33
|
+
*/
|
|
34
|
+
valuesResult?: ValuesCountEntry;
|
|
35
|
+
};
|
package/dist/util.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { EnumValueConfig, EnumValues } from "@rebasepro/types";
|
|
2
|
+
export declare function extractEnumFromValues(values: unknown[]): {
|
|
3
|
+
id: string;
|
|
4
|
+
label: string;
|
|
5
|
+
}[];
|
|
6
|
+
export declare function prettifyIdentifier(input: string): string;
|
|
7
|
+
export declare function unslugify(slug?: string): string;
|
|
8
|
+
export declare function resolveEnumValues(input: EnumValues): EnumValueConfig[] | undefined;
|
|
9
|
+
export declare function mergeDeep<T extends Record<any, any>, U extends Record<any, any>>(target: T, source: U, ignoreUndefined?: boolean): T & U;
|
|
10
|
+
export declare function isObject(item: any): any;
|
|
11
|
+
export declare function isPlainObject(obj: any): boolean;
|
package/package.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@rebasepro/schema-inference",
|
|
3
|
+
"version": "0.0.1-canary.4d4fb3e",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"publishConfig": {
|
|
6
|
+
"access": "public"
|
|
7
|
+
},
|
|
8
|
+
"main": "./dist/index.umd.cjs",
|
|
9
|
+
"module": "./dist/index.es.js",
|
|
10
|
+
"types": "dist/index.d.ts",
|
|
11
|
+
"source": "src/index.ts",
|
|
12
|
+
"devDependencies": {
|
|
13
|
+
"@rebasepro/types": "0.0.1-canary.4d4fb3e",
|
|
14
|
+
"@types/node": "^20.17.14",
|
|
15
|
+
"typescript": "^5.9.3",
|
|
16
|
+
"vite": "^7.2.4"
|
|
17
|
+
},
|
|
18
|
+
"exports": {
|
|
19
|
+
".": {
|
|
20
|
+
"types": "./dist/index.d.ts",
|
|
21
|
+
"development": "./src/index.ts",
|
|
22
|
+
"import": "./dist/index.es.js",
|
|
23
|
+
"require": "./dist/index.umd.cjs"
|
|
24
|
+
},
|
|
25
|
+
"./package.json": "./package.json"
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"dist",
|
|
29
|
+
"src"
|
|
30
|
+
],
|
|
31
|
+
"scripts": {
|
|
32
|
+
"dev": "vite",
|
|
33
|
+
"build": "vite build && tsc --emitDeclarationOnly -p tsconfig.prod.json",
|
|
34
|
+
"clean": "rm -rf dist && find ./src -name '*.js' -type f | xargs rm -f"
|
|
35
|
+
},
|
|
36
|
+
"gitHead": "71bcef3c51a458cd054f7924cc18efbbe515dcc8"
|
|
37
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { findCommonInitialStringInPath } from "../strings";
|
|
2
|
+
import { InferencePropertyBuilderProps } from "../types";
|
|
3
|
+
import { Property } from "@rebasepro/types";
|
|
4
|
+
|
|
5
|
+
export function buildReferenceProperty({
|
|
6
|
+
name,
|
|
7
|
+
totalDocsCount,
|
|
8
|
+
valuesResult
|
|
9
|
+
}: InferencePropertyBuilderProps): Property {
|
|
10
|
+
|
|
11
|
+
const property: Property = {
|
|
12
|
+
name: name ?? "",
|
|
13
|
+
type: "reference",
|
|
14
|
+
path: findCommonInitialStringInPath(valuesResult) ?? "!!!FIX_ME!!!"
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
return property;
|
|
18
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { findCommonInitialStringInPath } from "../strings";
|
|
2
|
+
import { extractEnumFromValues } from "../util";
|
|
3
|
+
import { FileType, Property, StringProperty } from "@rebasepro/types";
|
|
4
|
+
import { InferencePropertyBuilderProps, ValuesCountEntry } from "../types";
|
|
5
|
+
|
|
6
|
+
const IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp", ".gif", ".avif"];
|
|
7
|
+
const AUDIO_EXTENSIONS = [".mp3", ".ogg", ".opus", ".aac"];
|
|
8
|
+
const VIDEO_EXTENSIONS = [".avi", ".mp4"];
|
|
9
|
+
|
|
10
|
+
const emailRegEx = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
|
|
11
|
+
|
|
12
|
+
export function buildStringProperty({
|
|
13
|
+
name,
|
|
14
|
+
totalDocsCount,
|
|
15
|
+
valuesResult
|
|
16
|
+
}: InferencePropertyBuilderProps): Property {
|
|
17
|
+
|
|
18
|
+
let stringProperty: Property = {
|
|
19
|
+
name: name ?? "",
|
|
20
|
+
type: "string",
|
|
21
|
+
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
if (valuesResult) {
|
|
25
|
+
|
|
26
|
+
const totalEntriesCount = valuesResult.values.length;
|
|
27
|
+
const totalValues = Array.from(valuesResult.valuesCount.keys()).length;
|
|
28
|
+
|
|
29
|
+
const config: Partial<StringProperty> = {};
|
|
30
|
+
|
|
31
|
+
const probablyAURL = valuesResult.values
|
|
32
|
+
.filter((value) => typeof value === "string" &&
|
|
33
|
+
value.toString().startsWith("http")).length > totalDocsCount / 3 * 2;
|
|
34
|
+
if (probablyAURL) {
|
|
35
|
+
config.url = true;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const probablyAnEmail = valuesResult.values
|
|
39
|
+
.filter((value) => typeof value === "string" &&
|
|
40
|
+
emailRegEx.test(value)).length > totalDocsCount / 3 * 2;
|
|
41
|
+
if (probablyAnEmail) {
|
|
42
|
+
config.email = true;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const probablyUserIds = valuesResult.values
|
|
46
|
+
.filter((value) => typeof value === "string" && value.length === 28 && !value.includes(" "))
|
|
47
|
+
.length > totalDocsCount / 3 * 2;
|
|
48
|
+
if (probablyUserIds)
|
|
49
|
+
config.readOnly = true;
|
|
50
|
+
|
|
51
|
+
if (!probablyAnEmail &&
|
|
52
|
+
!probablyAURL &&
|
|
53
|
+
!probablyUserIds &&
|
|
54
|
+
!probablyAURL &&
|
|
55
|
+
totalValues < totalEntriesCount / 3
|
|
56
|
+
) {
|
|
57
|
+
const enumValues = extractEnumFromValues(Array.from(valuesResult.valuesCount.keys()));
|
|
58
|
+
|
|
59
|
+
if (Object.keys(enumValues).length > 1)
|
|
60
|
+
config.enum = enumValues;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// regular string
|
|
64
|
+
if (!probablyAnEmail &&
|
|
65
|
+
!probablyAURL &&
|
|
66
|
+
!probablyUserIds &&
|
|
67
|
+
!probablyAURL &&
|
|
68
|
+
!config.enum) {
|
|
69
|
+
const fileType = probableFileType(valuesResult, totalDocsCount);
|
|
70
|
+
if (fileType) {
|
|
71
|
+
config.storage = {
|
|
72
|
+
acceptedFiles: fileType as FileType[],
|
|
73
|
+
storagePath: findCommonInitialStringInPath(valuesResult) ?? "/"
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (Object.keys(config).length > 0)
|
|
79
|
+
stringProperty = {
|
|
80
|
+
...stringProperty,
|
|
81
|
+
...config
|
|
82
|
+
} as StringProperty;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return stringProperty;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function probableFileType(valuesCount: ValuesCountEntry, totalDocsCount: number): false | FileType[] {
|
|
89
|
+
const isImage = (value: string) => IMAGE_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
|
|
90
|
+
const isAudio = (value: string) => AUDIO_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
|
|
91
|
+
const isVideo = (value: string) => VIDEO_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
|
|
92
|
+
|
|
93
|
+
const stringValues = valuesCount.values.filter((v): v is string => typeof v === "string");
|
|
94
|
+
|
|
95
|
+
let imageCount = 0;
|
|
96
|
+
let audioCount = 0;
|
|
97
|
+
let videoCount = 0;
|
|
98
|
+
|
|
99
|
+
for (const value of stringValues) {
|
|
100
|
+
if (isImage(value)) imageCount++;
|
|
101
|
+
else if (isAudio(value)) audioCount++;
|
|
102
|
+
else if (isVideo(value)) videoCount++;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const totalMediaCount = imageCount + audioCount + videoCount;
|
|
106
|
+
if (totalMediaCount > (totalDocsCount * 2) / 3) {
|
|
107
|
+
const fileTypes: FileType[] = [];
|
|
108
|
+
if (imageCount > 0) fileTypes.push("image/*");
|
|
109
|
+
if (audioCount > 0) fileTypes.push("audio/*");
|
|
110
|
+
if (videoCount > 0) fileTypes.push("video/*");
|
|
111
|
+
return fileTypes.length > 0 ? fileTypes : false;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { PropertyValidationSchema } from "@rebasepro/types";
|
|
2
|
+
import { InferencePropertyBuilderProps } from "../types";
|
|
3
|
+
|
|
4
|
+
export function buildValidation({
|
|
5
|
+
totalDocsCount,
|
|
6
|
+
valuesResult
|
|
7
|
+
}: InferencePropertyBuilderProps): PropertyValidationSchema | undefined {
|
|
8
|
+
|
|
9
|
+
if (valuesResult) {
|
|
10
|
+
const totalEntriesCount = valuesResult.values.length;
|
|
11
|
+
if (totalDocsCount === totalEntriesCount)
|
|
12
|
+
return {
|
|
13
|
+
required: true
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
return undefined;
|
|
18
|
+
}
|
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
import {
|
|
2
|
+
InferencePropertyBuilderProps,
|
|
3
|
+
TypesCount,
|
|
4
|
+
TypesCountRecord,
|
|
5
|
+
ValuesCountEntry,
|
|
6
|
+
ValuesCountRecord
|
|
7
|
+
} from "./types";
|
|
8
|
+
import { buildStringProperty } from "./builders/string_property_builder";
|
|
9
|
+
import { buildValidation } from "./builders/validation_builder";
|
|
10
|
+
import { buildReferenceProperty } from "./builders/reference_property_builder";
|
|
11
|
+
import { extractEnumFromValues, mergeDeep, prettifyIdentifier, resolveEnumValues } from "./util";
|
|
12
|
+
import { DataType, EnumValues, Properties, Property, StringProperty } from "@rebasepro/types";
|
|
13
|
+
|
|
14
|
+
export type InferenceTypeBuilder = (value: any) => DataType;
|
|
15
|
+
|
|
16
|
+
export async function buildEntityPropertiesFromData(
|
|
17
|
+
data: object[],
|
|
18
|
+
getType: InferenceTypeBuilder
|
|
19
|
+
): Promise<Properties> {
|
|
20
|
+
const typesCount: TypesCountRecord = {};
|
|
21
|
+
const valuesCount: ValuesCountRecord = {};
|
|
22
|
+
if (data) {
|
|
23
|
+
data.forEach((entry) => {
|
|
24
|
+
if (entry) {
|
|
25
|
+
Object.entries(entry).forEach(([key, value]) => {
|
|
26
|
+
if (key.startsWith("_")) return; // Ignore properties starting with _
|
|
27
|
+
increaseMapTypeCount(typesCount, key, value, getType);
|
|
28
|
+
increaseValuesCount(valuesCount, key, value, getType);
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
return buildPropertiesFromCount(data.length, typesCount, valuesCount);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function buildPropertyFromData(
|
|
37
|
+
data: any[],
|
|
38
|
+
property: Property,
|
|
39
|
+
getType: InferenceTypeBuilder
|
|
40
|
+
): Property {
|
|
41
|
+
const typesCount = {};
|
|
42
|
+
const valuesCount: ValuesCountRecord = {};
|
|
43
|
+
if (data) {
|
|
44
|
+
data.forEach((entry) => {
|
|
45
|
+
increaseTypeCount(property.type, typesCount, entry, getType);
|
|
46
|
+
increaseValuesCount(valuesCount, "inferred_prop", entry, getType);
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
const enumValues = "enum" in property ? resolveEnumValues(property["enum"] as EnumValues) : undefined;
|
|
50
|
+
if (enumValues) {
|
|
51
|
+
const newEnumValues = extractEnumFromValues(Array.from(valuesCount["inferred_prop"].valuesCount.keys()));
|
|
52
|
+
return {
|
|
53
|
+
...property,
|
|
54
|
+
enum: [...newEnumValues, ...enumValues]
|
|
55
|
+
} as StringProperty;
|
|
56
|
+
}
|
|
57
|
+
const generatedProperty = buildPropertyFromCount(
|
|
58
|
+
"inferred_prop",
|
|
59
|
+
data.length,
|
|
60
|
+
property.type,
|
|
61
|
+
typesCount,
|
|
62
|
+
valuesCount["inferred_prop"]
|
|
63
|
+
);
|
|
64
|
+
return mergeDeep(generatedProperty, property);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function buildPropertiesOrder(
|
|
68
|
+
properties: Properties,
|
|
69
|
+
propertiesOrder?: string[],
|
|
70
|
+
priorityKeys?: string[]
|
|
71
|
+
): string[] {
|
|
72
|
+
const lowerCasePriorityKeys = (priorityKeys ?? []).map((key) => key.toLowerCase());
|
|
73
|
+
|
|
74
|
+
function propOrder(s: string) {
|
|
75
|
+
const k = s.toLowerCase();
|
|
76
|
+
if (lowerCasePriorityKeys.includes(k)) return 4;
|
|
77
|
+
if (k === "title" || k === "name") return 3;
|
|
78
|
+
if (k.includes("title") || k.includes("name")) return 2;
|
|
79
|
+
if (k.includes("image") || k.includes("picture")) return 1;
|
|
80
|
+
return 0;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const keys = propertiesOrder ?? Object.keys(properties);
|
|
84
|
+
keys.sort(); // alphabetically
|
|
85
|
+
keys.sort((a, b) => {
|
|
86
|
+
return propOrder(b) - propOrder(a);
|
|
87
|
+
});
|
|
88
|
+
return keys;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* @param type
|
|
93
|
+
* @param typesCount
|
|
94
|
+
* @param fieldValue
|
|
95
|
+
* @param getType
|
|
96
|
+
*/
|
|
97
|
+
function increaseTypeCount(
|
|
98
|
+
type: DataType,
|
|
99
|
+
typesCount: TypesCount,
|
|
100
|
+
fieldValue: any,
|
|
101
|
+
getType: InferenceTypeBuilder
|
|
102
|
+
) {
|
|
103
|
+
if (type === "map") {
|
|
104
|
+
if (fieldValue) {
|
|
105
|
+
let mapTypesCount = typesCount[type];
|
|
106
|
+
if (!mapTypesCount) {
|
|
107
|
+
mapTypesCount = {};
|
|
108
|
+
typesCount[type] = mapTypesCount;
|
|
109
|
+
}
|
|
110
|
+
Object.entries(fieldValue).forEach(([key, value]) => {
|
|
111
|
+
increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
} else if (type === "array") {
|
|
115
|
+
let arrayTypesCount = typesCount[type];
|
|
116
|
+
if (!arrayTypesCount) {
|
|
117
|
+
arrayTypesCount = {};
|
|
118
|
+
typesCount[type] = arrayTypesCount;
|
|
119
|
+
}
|
|
120
|
+
if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
|
|
121
|
+
const arrayType = getMostProbableTypeInArray(fieldValue, getType);
|
|
122
|
+
if (arrayType === "map") {
|
|
123
|
+
let mapTypesCount = arrayTypesCount[arrayType];
|
|
124
|
+
if (!mapTypesCount) {
|
|
125
|
+
mapTypesCount = {};
|
|
126
|
+
}
|
|
127
|
+
fieldValue.forEach((value) => {
|
|
128
|
+
if (value && typeof value === "object" && !Array.isArray(value)) { // Ensure value is an object for Object.entries
|
|
129
|
+
Object.entries(value).forEach(([key, v]) =>
|
|
130
|
+
increaseMapTypeCount(mapTypesCount, key, v, getType)
|
|
131
|
+
);
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
arrayTypesCount[arrayType] = mapTypesCount;
|
|
135
|
+
} else {
|
|
136
|
+
if (!arrayTypesCount[arrayType]) arrayTypesCount[arrayType] = 1;
|
|
137
|
+
else arrayTypesCount[arrayType] = Number(arrayTypesCount[arrayType]) + 1;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
} else {
|
|
141
|
+
if (!typesCount[type]) typesCount[type] = 1;
|
|
142
|
+
else typesCount[type] = Number(typesCount[type]) + 1;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function increaseMapTypeCount(
|
|
147
|
+
typesCountRecord: TypesCountRecord,
|
|
148
|
+
key: string,
|
|
149
|
+
fieldValue: any,
|
|
150
|
+
getType: InferenceTypeBuilder
|
|
151
|
+
) {
|
|
152
|
+
if (key.startsWith("_")) return; // Ignore properties starting with _
|
|
153
|
+
|
|
154
|
+
let typesCount: TypesCount = typesCountRecord[key];
|
|
155
|
+
if (!typesCount) {
|
|
156
|
+
typesCount = {};
|
|
157
|
+
typesCountRecord[key] = typesCount;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (fieldValue != null) {
|
|
161
|
+
// Check that fieldValue is not null or undefined before proceeding
|
|
162
|
+
const type = getType(fieldValue);
|
|
163
|
+
increaseTypeCount(type, typesCount, fieldValue, getType);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function increaseValuesCount(
|
|
168
|
+
typeValuesRecord: ValuesCountRecord,
|
|
169
|
+
key: string,
|
|
170
|
+
fieldValue: any,
|
|
171
|
+
getType: InferenceTypeBuilder
|
|
172
|
+
) {
|
|
173
|
+
if (key.startsWith("_")) return; // Ignore properties starting with _
|
|
174
|
+
|
|
175
|
+
const type = getType(fieldValue);
|
|
176
|
+
|
|
177
|
+
let valuesRecord: {
|
|
178
|
+
values: any[];
|
|
179
|
+
valuesCount: Map<any, number>;
|
|
180
|
+
map?: ValuesCountRecord;
|
|
181
|
+
} = typeValuesRecord[key];
|
|
182
|
+
|
|
183
|
+
if (!valuesRecord) {
|
|
184
|
+
valuesRecord = {
|
|
185
|
+
values: [],
|
|
186
|
+
valuesCount: new Map()
|
|
187
|
+
};
|
|
188
|
+
typeValuesRecord[key] = valuesRecord;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (type === "map") {
|
|
192
|
+
let mapValuesRecord: ValuesCountRecord | undefined = valuesRecord.map;
|
|
193
|
+
if (!mapValuesRecord) {
|
|
194
|
+
mapValuesRecord = {};
|
|
195
|
+
valuesRecord.map = mapValuesRecord;
|
|
196
|
+
}
|
|
197
|
+
if (fieldValue)
|
|
198
|
+
Object.entries(fieldValue).forEach(([subKey, value]) =>
|
|
199
|
+
increaseValuesCount(mapValuesRecord as ValuesCountRecord, subKey, value, getType)
|
|
200
|
+
);
|
|
201
|
+
} else if (type === "array") {
|
|
202
|
+
if (Array.isArray(fieldValue)) {
|
|
203
|
+
fieldValue.forEach((value) => {
|
|
204
|
+
valuesRecord.values.push(value);
|
|
205
|
+
valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
} else {
|
|
209
|
+
if (fieldValue !== null && fieldValue !== undefined) {
|
|
210
|
+
valuesRecord.values.push(fieldValue);
|
|
211
|
+
valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
function getHighestTypesCount(typesCount: TypesCount): number {
|
|
217
|
+
let highestCount = 0;
|
|
218
|
+
Object.entries(typesCount).forEach(([type, count]) => {
|
|
219
|
+
let countValue = 0;
|
|
220
|
+
if (type === "map") {
|
|
221
|
+
countValue = getHighestRecordCount(count as TypesCountRecord);
|
|
222
|
+
} else if (type === "array") {
|
|
223
|
+
countValue = getHighestTypesCount(count as TypesCount);
|
|
224
|
+
} else {
|
|
225
|
+
countValue = Number(count);
|
|
226
|
+
}
|
|
227
|
+
if (countValue > highestCount) {
|
|
228
|
+
highestCount = countValue;
|
|
229
|
+
}
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
return highestCount;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function getHighestRecordCount(record: TypesCountRecord): number {
|
|
236
|
+
return Object.entries(record)
|
|
237
|
+
.map(([key, typesCount]) => getHighestTypesCount(typesCount))
|
|
238
|
+
.reduce((a, b) => Math.max(a, b), 0);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function getMostProbableType(typesCount: TypesCount): DataType {
|
|
242
|
+
let highestCount = -1;
|
|
243
|
+
let probableType: DataType = "string"; // default
|
|
244
|
+
Object.entries(typesCount).forEach(([type, count]) => {
|
|
245
|
+
let countValue;
|
|
246
|
+
if (type === "map") {
|
|
247
|
+
countValue = getHighestRecordCount(count as TypesCountRecord);
|
|
248
|
+
} else if (type === "array") {
|
|
249
|
+
countValue = getHighestTypesCount(count as TypesCount);
|
|
250
|
+
} else {
|
|
251
|
+
countValue = Number(count);
|
|
252
|
+
}
|
|
253
|
+
if (countValue > highestCount) {
|
|
254
|
+
highestCount = countValue;
|
|
255
|
+
probableType = type as DataType;
|
|
256
|
+
}
|
|
257
|
+
});
|
|
258
|
+
return probableType;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function buildPropertyFromCount(
|
|
262
|
+
key: string,
|
|
263
|
+
totalDocsCount: number,
|
|
264
|
+
mostProbableType: DataType,
|
|
265
|
+
typesCount: TypesCount,
|
|
266
|
+
valuesResult?: ValuesCountEntry
|
|
267
|
+
): Property {
|
|
268
|
+
let title: string | undefined;
|
|
269
|
+
|
|
270
|
+
if (key) {
|
|
271
|
+
title = prettifyIdentifier(key);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
let result: Property | undefined = undefined;
|
|
275
|
+
if (mostProbableType === "map") {
|
|
276
|
+
const highVariability = checkTypesCountHighVariability(typesCount);
|
|
277
|
+
if (highVariability) {
|
|
278
|
+
result = {
|
|
279
|
+
type: "map",
|
|
280
|
+
name: title ?? key ?? "",
|
|
281
|
+
keyValue: true,
|
|
282
|
+
properties: {}
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
const properties = buildPropertiesFromCount(
|
|
286
|
+
totalDocsCount,
|
|
287
|
+
typesCount.map as TypesCountRecord,
|
|
288
|
+
valuesResult ? valuesResult.mapValues : undefined
|
|
289
|
+
);
|
|
290
|
+
result = {
|
|
291
|
+
type: "map",
|
|
292
|
+
name: title ?? key ?? "",
|
|
293
|
+
properties
|
|
294
|
+
};
|
|
295
|
+
} else if (mostProbableType === "array") {
|
|
296
|
+
const arrayTypesCount = typesCount.array as TypesCount;
|
|
297
|
+
const arrayMostProbableType = getMostProbableType(arrayTypesCount);
|
|
298
|
+
const of = buildPropertyFromCount(
|
|
299
|
+
key,
|
|
300
|
+
totalDocsCount,
|
|
301
|
+
arrayMostProbableType,
|
|
302
|
+
arrayTypesCount,
|
|
303
|
+
valuesResult
|
|
304
|
+
);
|
|
305
|
+
result = {
|
|
306
|
+
type: "array",
|
|
307
|
+
name: title ?? key ?? "",
|
|
308
|
+
of
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (!result) {
|
|
313
|
+
const propertyProps: InferencePropertyBuilderProps = {
|
|
314
|
+
name: key,
|
|
315
|
+
totalDocsCount,
|
|
316
|
+
valuesResult
|
|
317
|
+
};
|
|
318
|
+
if (mostProbableType === "string") {
|
|
319
|
+
result = buildStringProperty(propertyProps);
|
|
320
|
+
} else if (mostProbableType === "reference") {
|
|
321
|
+
result = buildReferenceProperty(propertyProps);
|
|
322
|
+
} else {
|
|
323
|
+
result = {
|
|
324
|
+
type: mostProbableType
|
|
325
|
+
} as Property;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (title) {
|
|
329
|
+
result.name = title;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const validation = buildValidation(propertyProps);
|
|
333
|
+
if (validation) {
|
|
334
|
+
result.validation = validation;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return result;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function buildPropertiesFromCount(
|
|
342
|
+
totalDocsCount: number,
|
|
343
|
+
typesCountRecord: TypesCountRecord,
|
|
344
|
+
valuesCountRecord?: ValuesCountRecord
|
|
345
|
+
): Properties {
|
|
346
|
+
const res: Properties = {};
|
|
347
|
+
Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
|
|
348
|
+
const mostProbableType = getMostProbableType(typesCount);
|
|
349
|
+
res[key] = buildPropertyFromCount(
|
|
350
|
+
key,
|
|
351
|
+
totalDocsCount,
|
|
352
|
+
mostProbableType,
|
|
353
|
+
typesCount,
|
|
354
|
+
valuesCountRecord ? valuesCountRecord[key] : undefined
|
|
355
|
+
);
|
|
356
|
+
});
|
|
357
|
+
return res;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function countMaxDocumentsUnder(typesCount: TypesCount) {
|
|
361
|
+
let count = 0;
|
|
362
|
+
Object.entries(typesCount).forEach(([type, value]) => {
|
|
363
|
+
if (typeof value === "object") {
|
|
364
|
+
count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
|
|
365
|
+
} else {
|
|
366
|
+
count = Math.max(count, Number(value));
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
return count;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
function getMostProbableTypeInArray(
|
|
373
|
+
array: any[],
|
|
374
|
+
getType: InferenceTypeBuilder
|
|
375
|
+
): DataType {
|
|
376
|
+
const typesCount: TypesCount = {};
|
|
377
|
+
array.forEach((value) => {
|
|
378
|
+
increaseTypeCount(getType(value), typesCount, value, getType);
|
|
379
|
+
});
|
|
380
|
+
return getMostProbableType(typesCount);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function checkTypesCountHighVariability(typesCount: TypesCount) {
|
|
384
|
+
const maxCount = countMaxDocumentsUnder(typesCount);
|
|
385
|
+
let keysWithFewValues = 0;
|
|
386
|
+
Object.entries(typesCount.map ?? {}).forEach(([key, value]) => {
|
|
387
|
+
const count = countMaxDocumentsUnder(value);
|
|
388
|
+
if (count < maxCount / 3) {
|
|
389
|
+
keysWithFewValues++;
|
|
390
|
+
}
|
|
391
|
+
});
|
|
392
|
+
return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
export function inferTypeFromValue(value: any): DataType {
|
|
397
|
+
if (value === null || value === undefined) return "string";
|
|
398
|
+
if (typeof value === "string") return "string";
|
|
399
|
+
if (typeof value === "number") return "number";
|
|
400
|
+
if (typeof value === "boolean") return "boolean";
|
|
401
|
+
if (Array.isArray(value)) return "array";
|
|
402
|
+
if (typeof value === "object") return "map";
|
|
403
|
+
return "string";
|
|
404
|
+
}
|
package/src/index.ts
ADDED