@firecms/schema_inference 3.0.0-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +1 -0
- package/package.json +27 -0
- package/src/builders/reference_property_builder.ts +16 -0
- package/src/builders/string_property_builder.ts +107 -0
- package/src/builders/validation_builder.ts +18 -0
- package/src/collection_builder.ts +324 -0
- package/src/index.ts +2 -0
- package/src/strings.ts +47 -0
- package/src/test_schemas/test_schema.ts +28 -0
- package/src/test_schemas/usage.json +36341 -0
- package/src/types.ts +42 -0
- package/src/util.ts +8 -0
- package/tsconfig.json +53 -0
- package/vite.config.ts +32 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 FireCMS
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# schema_inference
|
package/package.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "3.0.0-alpha.4",
|
|
3
|
+
"name": "@firecms/schema_inference",
|
|
4
|
+
"access": "public",
|
|
5
|
+
"packageManager": "yarn@3.2.3",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"main": "./dist/index.umd.js",
|
|
8
|
+
"module": "./dist/index.es.js",
|
|
9
|
+
"types": "dist/index.d.ts",
|
|
10
|
+
"source": "src/index.ts",
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"import": "./dist/index.es.js",
|
|
14
|
+
"require": "./dist/index.umd.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"@types/node": "^20.5.9",
|
|
19
|
+
"firecms": "^3.0.0-alpha.4",
|
|
20
|
+
"typescript": "^5.2.2"
|
|
21
|
+
},
|
|
22
|
+
"scripts": {
|
|
23
|
+
"dev": "vite",
|
|
24
|
+
"build": "vite build && tsc --emitDeclarationOnly"
|
|
25
|
+
},
|
|
26
|
+
"gitHead": "17bba2feea1f6c818c0d9d4b3d6c8e4dfd4e5b4b"
|
|
27
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { InferencePropertyBuilderProps } from "../types";
|
|
2
|
+
import { findCommonInitialStringInPath } from "../strings";
|
|
3
|
+
import { Property } from "firecms";
|
|
4
|
+
|
|
5
|
+
export function buildReferenceProperty({
|
|
6
|
+
totalDocsCount,
|
|
7
|
+
valuesResult
|
|
8
|
+
}: InferencePropertyBuilderProps): Property {
|
|
9
|
+
|
|
10
|
+
const property: Property = {
|
|
11
|
+
dataType: "reference",
|
|
12
|
+
path: findCommonInitialStringInPath(valuesResult) ?? "!!!FIX_ME!!!"
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
return property;
|
|
16
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { FileType, Property, StringProperty, unslugify } from "firecms";
|
|
2
|
+
import { InferencePropertyBuilderProps, ValuesCountEntry } from "../types";
|
|
3
|
+
import { findCommonInitialStringInPath } from "../strings";
|
|
4
|
+
import { extractEnumFromValues } from "../util";
|
|
5
|
+
|
|
6
|
+
const IMAGE_EXTENSIONS = [".jpg", ".png", ".webp", ".gif"];
|
|
7
|
+
const AUDIO_EXTENSIONS = [".mp3", ".ogg", ".opus", ".aac"];
|
|
8
|
+
const VIDEO_EXTENSIONS = [".avi", ".mp4"];
|
|
9
|
+
|
|
10
|
+
const emailRegEx = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
export function buildStringProperty({
|
|
14
|
+
totalDocsCount,
|
|
15
|
+
valuesResult
|
|
16
|
+
}: InferencePropertyBuilderProps): Property {
|
|
17
|
+
|
|
18
|
+
let stringProperty: Property = {
|
|
19
|
+
dataType: "string",
|
|
20
|
+
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
if (valuesResult) {
|
|
24
|
+
|
|
25
|
+
const totalEntriesCount = valuesResult.values.length;
|
|
26
|
+
const totalValues = Array.from(valuesResult.valuesCount.keys()).length;
|
|
27
|
+
|
|
28
|
+
const config: Partial<StringProperty> = {};
|
|
29
|
+
|
|
30
|
+
const probablyAURL = valuesResult.values
|
|
31
|
+
.filter((value) => typeof value === "string" &&
|
|
32
|
+
value.toString().startsWith("http")).length > totalDocsCount / 3 * 2;
|
|
33
|
+
if (probablyAURL) {
|
|
34
|
+
config.url = true;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const probablyAnEmail = valuesResult.values
|
|
38
|
+
.filter((value) => typeof value === "string" &&
|
|
39
|
+
emailRegEx.test(value)).length > totalDocsCount / 3 * 2;
|
|
40
|
+
if (probablyAnEmail) {
|
|
41
|
+
config.email = true;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const probablyUserIds = valuesResult.values
|
|
45
|
+
.filter((value) => typeof value === "string" && value.length === 28 && !value.includes(" "))
|
|
46
|
+
.length > totalDocsCount / 3 * 2;
|
|
47
|
+
if (probablyUserIds)
|
|
48
|
+
config.readOnly = true;
|
|
49
|
+
|
|
50
|
+
if (!probablyAnEmail &&
|
|
51
|
+
!probablyAURL &&
|
|
52
|
+
!probablyUserIds &&
|
|
53
|
+
!probablyAURL &&
|
|
54
|
+
totalValues < totalEntriesCount / 3
|
|
55
|
+
) {
|
|
56
|
+
const enumValues = extractEnumFromValues(Array.from(valuesResult.valuesCount.keys()));
|
|
57
|
+
|
|
58
|
+
if (Object.keys(enumValues).length > 1)
|
|
59
|
+
config.enumValues = enumValues;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// regular string
|
|
63
|
+
if (!probablyAnEmail &&
|
|
64
|
+
!probablyAURL &&
|
|
65
|
+
!probablyUserIds &&
|
|
66
|
+
!probablyAURL &&
|
|
67
|
+
!config.enumValues) {
|
|
68
|
+
const fileType = probableFileType(valuesResult, totalDocsCount);
|
|
69
|
+
if (fileType) {
|
|
70
|
+
config.storage = {
|
|
71
|
+
acceptedFiles: [fileType as FileType],
|
|
72
|
+
storagePath: findCommonInitialStringInPath(valuesResult) ?? "/"
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (Object.keys(config).length > 0)
|
|
78
|
+
stringProperty = {
|
|
79
|
+
...stringProperty,
|
|
80
|
+
...config
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return stringProperty;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// TODO: support returning multiple types
|
|
88
|
+
function probableFileType(valuesCount: ValuesCountEntry, totalDocsCount: number): boolean | FileType {
|
|
89
|
+
const probablyAnImage = valuesCount.values
|
|
90
|
+
.filter((value) => typeof value === "string" &&
|
|
91
|
+
IMAGE_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
|
|
92
|
+
|
|
93
|
+
const probablyAudio = valuesCount.values
|
|
94
|
+
.filter((value) => typeof value === "string" &&
|
|
95
|
+
AUDIO_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
|
|
96
|
+
|
|
97
|
+
const probablyVideo = valuesCount.values
|
|
98
|
+
.filter((value) => typeof value === "string" &&
|
|
99
|
+
VIDEO_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
|
|
100
|
+
|
|
101
|
+
const fileType: boolean | FileType = probablyAnImage
|
|
102
|
+
? "image/*"
|
|
103
|
+
: probablyAudio
|
|
104
|
+
? "audio/*"
|
|
105
|
+
: probablyVideo ? "video/*" : false;
|
|
106
|
+
return fileType;
|
|
107
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { PropertyValidationSchema } from "firecms";
|
|
2
|
+
import { InferencePropertyBuilderProps } from "../types";
|
|
3
|
+
|
|
4
|
+
export function buildValidation({
|
|
5
|
+
totalDocsCount,
|
|
6
|
+
valuesResult
|
|
7
|
+
}: InferencePropertyBuilderProps): PropertyValidationSchema | undefined {
|
|
8
|
+
|
|
9
|
+
if (valuesResult) {
|
|
10
|
+
const totalEntriesCount = valuesResult.values.length;
|
|
11
|
+
if (totalDocsCount === totalEntriesCount)
|
|
12
|
+
return {
|
|
13
|
+
required: true
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
return undefined;
|
|
18
|
+
}
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DataType,
|
|
3
|
+
EnumValues,
|
|
4
|
+
mergeDeep,
|
|
5
|
+
Properties,
|
|
6
|
+
Property,
|
|
7
|
+
resolveEnumValues,
|
|
8
|
+
StringProperty,
|
|
9
|
+
unslugify
|
|
10
|
+
} from "firecms";
|
|
11
|
+
import {
|
|
12
|
+
InferencePropertyBuilderProps,
|
|
13
|
+
TypesCount,
|
|
14
|
+
TypesCountRecord,
|
|
15
|
+
ValuesCountEntry,
|
|
16
|
+
ValuesCountRecord
|
|
17
|
+
} from "./types";
|
|
18
|
+
import { buildStringProperty } from "./builders/string_property_builder";
|
|
19
|
+
import { buildValidation } from "./builders/validation_builder";
|
|
20
|
+
import { buildReferenceProperty } from "./builders/reference_property_builder";
|
|
21
|
+
import { extractEnumFromValues } from "./util";
|
|
22
|
+
|
|
23
|
+
export type InferenceTypeBuilder = (value: any) => DataType;
|
|
24
|
+
|
|
25
|
+
export async function buildEntityPropertiesFromData(data: object[], getType: InferenceTypeBuilder): Promise<Properties> {
|
|
26
|
+
const typesCount: TypesCountRecord = {};
|
|
27
|
+
const valuesCount: ValuesCountRecord = {};
|
|
28
|
+
data.forEach((entry) => {
|
|
29
|
+
if (entry) {
|
|
30
|
+
Object.entries(entry).forEach(([key, value]) => {
|
|
31
|
+
increaseMapTypeCount(typesCount, key, value, getType);
|
|
32
|
+
increaseValuesCount(valuesCount, key, value, getType);
|
|
33
|
+
})
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
// console.log(util.inspect({ typesCount }, { showHidden: false, depth: null, colors: true }));
|
|
37
|
+
return buildPropertiesFromCount(data.length, typesCount, valuesCount);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function buildPropertyFromData(data: any[], property: Property, getType: InferenceTypeBuilder): Property {
|
|
41
|
+
const typesCount = {};
|
|
42
|
+
const valuesCount: ValuesCountRecord = {};
|
|
43
|
+
data.forEach((entry) => {
|
|
44
|
+
increaseTypeCount(property.dataType, typesCount, entry, getType);
|
|
45
|
+
increaseValuesCount(valuesCount, "inferred_prop", entry, getType);
|
|
46
|
+
});
|
|
47
|
+
const enumValues = "enumValues" in property ? resolveEnumValues(property["enumValues"] as EnumValues) : undefined;
|
|
48
|
+
if (enumValues) {
|
|
49
|
+
const newEnumValues = extractEnumFromValues(Array.from(valuesCount["inferred_prop"].valuesCount.keys()));
|
|
50
|
+
console.log("newEnumValues", newEnumValues);
|
|
51
|
+
return { ...property, enumValues: [...newEnumValues, ...enumValues] } as StringProperty;
|
|
52
|
+
}
|
|
53
|
+
const generatedProperty = buildPropertyFromCount("inferred_prop", data.length, property.dataType, typesCount, valuesCount["inferred_prop"]);
|
|
54
|
+
return mergeDeep(generatedProperty, property);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function buildPropertiesOrder(properties: Properties<any>): string [] {
|
|
58
|
+
function propOrder(s: string) {
|
|
59
|
+
const k = s.toLowerCase();
|
|
60
|
+
if (k === "title" || k === "name") return 3;
|
|
61
|
+
if (k.includes("title") || k.includes("name")) return 2;
|
|
62
|
+
if (k.includes("image") || k.includes("picture")) return 1;
|
|
63
|
+
return 0;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const keys = Object.keys(properties);
|
|
67
|
+
keys.sort(); // alphabetically
|
|
68
|
+
keys.sort((a, b) => {
|
|
69
|
+
return propOrder(b) - propOrder(a);
|
|
70
|
+
});
|
|
71
|
+
return keys;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* @param type
|
|
76
|
+
* @param typesCount
|
|
77
|
+
* @param fieldValue
|
|
78
|
+
* @param getType
|
|
79
|
+
*/
|
|
80
|
+
function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: any, getType: InferenceTypeBuilder) {
|
|
81
|
+
if (type === "map") {
|
|
82
|
+
if (fieldValue) {
|
|
83
|
+
let mapTypesCount = typesCount[type];
|
|
84
|
+
if (!mapTypesCount) {
|
|
85
|
+
mapTypesCount = {};
|
|
86
|
+
typesCount[type] = mapTypesCount;
|
|
87
|
+
}
|
|
88
|
+
Object.entries(fieldValue).forEach(([key, value]) => {
|
|
89
|
+
increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
|
|
90
|
+
})
|
|
91
|
+
}
|
|
92
|
+
} else if (type === "array") {
|
|
93
|
+
let arrayTypesCount = typesCount[type];
|
|
94
|
+
if (!arrayTypesCount) {
|
|
95
|
+
arrayTypesCount = {};
|
|
96
|
+
typesCount[type] = arrayTypesCount;
|
|
97
|
+
}
|
|
98
|
+
if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
|
|
99
|
+
const arrayType = getMostProbableTypeInArray(fieldValue, getType); // get type of first element
|
|
100
|
+
if (!arrayTypesCount[arrayType]) (arrayTypesCount[arrayType] as number) = 1;
|
|
101
|
+
else (arrayTypesCount[arrayType] as number)++;
|
|
102
|
+
}
|
|
103
|
+
} else {
|
|
104
|
+
if (!typesCount[type]) typesCount[type] = 1;
|
|
105
|
+
else (typesCount[type] as number)++;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function increaseMapTypeCount(
|
|
110
|
+
typesCountRecord: TypesCountRecord,
|
|
111
|
+
key: string,
|
|
112
|
+
fieldValue: any,
|
|
113
|
+
getType: InferenceTypeBuilder
|
|
114
|
+
) {
|
|
115
|
+
let typesCount: TypesCount = typesCountRecord[key];
|
|
116
|
+
if (!typesCount) {
|
|
117
|
+
typesCount = {};
|
|
118
|
+
typesCountRecord[key] = typesCount;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (fieldValue != null) { // Check that fieldValue is not null or undefined before proceeding
|
|
122
|
+
const type = getType(fieldValue);
|
|
123
|
+
increaseTypeCount(type, typesCount, fieldValue, getType);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function increaseValuesCount(
|
|
128
|
+
typeValuesRecord: ValuesCountRecord,
|
|
129
|
+
key: string,
|
|
130
|
+
fieldValue: any,
|
|
131
|
+
getType: InferenceTypeBuilder
|
|
132
|
+
) {
|
|
133
|
+
|
|
134
|
+
const dataType = getType(fieldValue);
|
|
135
|
+
|
|
136
|
+
let valuesRecord: {
|
|
137
|
+
values: any[];
|
|
138
|
+
valuesCount: Map<any, number>;
|
|
139
|
+
map?: ValuesCountRecord;
|
|
140
|
+
} = typeValuesRecord[key];
|
|
141
|
+
|
|
142
|
+
if (!valuesRecord) {
|
|
143
|
+
valuesRecord = {
|
|
144
|
+
values: [],
|
|
145
|
+
valuesCount: new Map()
|
|
146
|
+
};
|
|
147
|
+
typeValuesRecord[key] = valuesRecord;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (dataType === "map") {
|
|
151
|
+
let mapValuesRecord: ValuesCountRecord | undefined = valuesRecord.map;
|
|
152
|
+
if (!mapValuesRecord) {
|
|
153
|
+
mapValuesRecord = {};
|
|
154
|
+
valuesRecord.map = mapValuesRecord;
|
|
155
|
+
}
|
|
156
|
+
if (fieldValue)
|
|
157
|
+
Object.entries(fieldValue).forEach(([key, value]) => increaseValuesCount(mapValuesRecord as ValuesCountRecord, key, value, getType))
|
|
158
|
+
} else if (dataType === "array") {
|
|
159
|
+
if (Array.isArray(fieldValue)) {
|
|
160
|
+
fieldValue.forEach((value) => {
|
|
161
|
+
valuesRecord.values.push(value);
|
|
162
|
+
valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
|
|
163
|
+
})
|
|
164
|
+
}
|
|
165
|
+
} else {
|
|
166
|
+
if (fieldValue) {
|
|
167
|
+
valuesRecord.values.push(fieldValue);
|
|
168
|
+
valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function getHighestTypesCount(typesCount: TypesCount): number {
|
|
175
|
+
let highestCount = 0;
|
|
176
|
+
Object.entries(typesCount).forEach(([type, count]) => {
|
|
177
|
+
let countValue = 0;
|
|
178
|
+
if (type === "map") {
|
|
179
|
+
countValue = getHighestRecordCount(count as TypesCountRecord);
|
|
180
|
+
} else if (type === "array") {
|
|
181
|
+
countValue = getHighestTypesCount(count as TypesCount);
|
|
182
|
+
} else {
|
|
183
|
+
countValue = count as number;
|
|
184
|
+
}
|
|
185
|
+
if (countValue > highestCount) {
|
|
186
|
+
highestCount = countValue;
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
return highestCount;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function getHighestRecordCount(record: TypesCountRecord): number {
|
|
194
|
+
return Object.entries(record)
|
|
195
|
+
.map(([key, typesCount]) => getHighestTypesCount(typesCount))
|
|
196
|
+
.reduce((a, b) => Math.max(a, b), 0);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function getMostProbableType(typesCount: TypesCount): DataType {
|
|
200
|
+
let highestCount = -1;
|
|
201
|
+
let probableType: DataType = "string"; //default
|
|
202
|
+
Object.entries(typesCount).forEach(([type, count]) => {
|
|
203
|
+
let countValue;
|
|
204
|
+
if (type === "map") {
|
|
205
|
+
countValue = getHighestRecordCount(count as TypesCountRecord);
|
|
206
|
+
} else if (type === "array") {
|
|
207
|
+
countValue = getHighestTypesCount(count as TypesCount);
|
|
208
|
+
} else {
|
|
209
|
+
countValue = count as number;
|
|
210
|
+
}
|
|
211
|
+
if (countValue > highestCount) {
|
|
212
|
+
highestCount = countValue;
|
|
213
|
+
probableType = type as DataType;
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
return probableType;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbableType: DataType, typesCount: TypesCount, valuesResult?: ValuesCountEntry): Property {
|
|
220
|
+
let title: string | undefined;
|
|
221
|
+
|
|
222
|
+
if (key) {
|
|
223
|
+
title = unslugify(key);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
if (mostProbableType === "map") {
|
|
227
|
+
|
|
228
|
+
const highVariability = checkTypesCountHighVariability(typesCount);
|
|
229
|
+
if (highVariability) {
|
|
230
|
+
return {
|
|
231
|
+
dataType: "map",
|
|
232
|
+
name: title,
|
|
233
|
+
keyValue: true,
|
|
234
|
+
properties: {}
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
const properties = buildPropertiesFromCount(totalDocsCount, typesCount.map as TypesCountRecord, valuesResult ? valuesResult.mapValues : undefined);
|
|
238
|
+
return {
|
|
239
|
+
dataType: "map",
|
|
240
|
+
name: title,
|
|
241
|
+
properties
|
|
242
|
+
};
|
|
243
|
+
} else if (mostProbableType === "array") {
|
|
244
|
+
const arrayTypesCount = typesCount.array as TypesCount;
|
|
245
|
+
const arrayMostProbableType = getMostProbableType(arrayTypesCount);
|
|
246
|
+
const of = buildPropertyFromCount(key, totalDocsCount, arrayMostProbableType, arrayTypesCount, valuesResult);
|
|
247
|
+
return {
|
|
248
|
+
dataType: "array",
|
|
249
|
+
name: title,
|
|
250
|
+
of
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
let result: Property;
|
|
254
|
+
const propertyProps: InferencePropertyBuilderProps = {
|
|
255
|
+
name: key,
|
|
256
|
+
totalDocsCount,
|
|
257
|
+
valuesResult
|
|
258
|
+
};
|
|
259
|
+
if (mostProbableType === "string") {
|
|
260
|
+
result = buildStringProperty(propertyProps);
|
|
261
|
+
} else if (mostProbableType === "reference") {
|
|
262
|
+
result = buildReferenceProperty(propertyProps);
|
|
263
|
+
} else {
|
|
264
|
+
result = {
|
|
265
|
+
dataType: mostProbableType,
|
|
266
|
+
|
|
267
|
+
} as Property;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (title) {
|
|
271
|
+
result.name = title;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const validation = buildValidation(propertyProps);
|
|
275
|
+
if (validation) {
|
|
276
|
+
result.validation = validation;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return result;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function buildPropertiesFromCount(totalDocsCount: number, typesCountRecord: TypesCountRecord, valuesCountRecord?: ValuesCountRecord): Properties {
|
|
283
|
+
const res: Properties = {};
|
|
284
|
+
Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
|
|
285
|
+
const mostProbableType = getMostProbableType(typesCount);
|
|
286
|
+
res[key] = buildPropertyFromCount(key, totalDocsCount, mostProbableType, typesCount, valuesCountRecord ? valuesCountRecord[key] : undefined);
|
|
287
|
+
})
|
|
288
|
+
return res;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function countMaxDocumentsUnder(typesCount: TypesCount) {
|
|
292
|
+
let count = 0;
|
|
293
|
+
Object.entries(typesCount).forEach(([type, value]) => {
|
|
294
|
+
// console.log(util.inspect({ type, value }, { showHidden: false, depth: null, colors: true }));
|
|
295
|
+
if (typeof value === "object") {
|
|
296
|
+
count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
|
|
297
|
+
} else {
|
|
298
|
+
count = Math.max(count, value as number);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
return count;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
function getMostProbableTypeInArray(array: any[], getType: InferenceTypeBuilder): DataType {
|
|
305
|
+
let typesCount: TypesCount = {};
|
|
306
|
+
array.forEach((value) => {
|
|
307
|
+
increaseTypeCount(getType(value), typesCount, value, getType);
|
|
308
|
+
});
|
|
309
|
+
return getMostProbableType(typesCount);
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function checkTypesCountHighVariability(typesCount: TypesCount) {
|
|
313
|
+
const maxCount = countMaxDocumentsUnder(typesCount);
|
|
314
|
+
let keysWithFewValues = 0;
|
|
315
|
+
Object.entries(typesCount.map ?? {})
|
|
316
|
+
.forEach(([key, value]) => {
|
|
317
|
+
const count = countMaxDocumentsUnder(value);
|
|
318
|
+
if (count < maxCount / 3) {
|
|
319
|
+
keysWithFewValues++;
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
|
|
323
|
+
}
|
|
324
|
+
|
package/src/index.ts
ADDED
package/src/strings.ts
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { ValuesCountEntry } from "./types";
|
|
2
|
+
import { DocumentReference } from "firebase/firestore";
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
export function findCommonInitialStringInPath(valuesCount?: ValuesCountEntry) {
|
|
6
|
+
|
|
7
|
+
if (!valuesCount) return undefined;
|
|
8
|
+
|
|
9
|
+
function getPath(value: any) {
|
|
10
|
+
if (typeof value === "string") return value;
|
|
11
|
+
else if (value instanceof DocumentReference) return value.path;
|
|
12
|
+
else return undefined;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const strings: string[] = valuesCount.values.map((v) => getPath(v)).filter(v => !!v) as string[];
|
|
16
|
+
const pathWithSlash = strings.find((s) => s.includes("/"));
|
|
17
|
+
if (!pathWithSlash)
|
|
18
|
+
return undefined;
|
|
19
|
+
|
|
20
|
+
const searchedPath = pathWithSlash.substr(0, pathWithSlash.lastIndexOf("/"));
|
|
21
|
+
|
|
22
|
+
const yep = valuesCount.values
|
|
23
|
+
.filter((value) => {
|
|
24
|
+
const path = getPath(value);
|
|
25
|
+
if (!path) return false;
|
|
26
|
+
return path.startsWith(searchedPath)
|
|
27
|
+
}).length > valuesCount.values.length / 3 * 2;
|
|
28
|
+
|
|
29
|
+
return yep ? searchedPath : undefined;
|
|
30
|
+
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function removeInitialAndTrailingSlashes(s: string): string {
|
|
34
|
+
return removeInitialSlash(removeTrailingSlash(s));
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function removeInitialSlash(s: string) {
|
|
38
|
+
if (s.startsWith("/"))
|
|
39
|
+
return s.slice(1);
|
|
40
|
+
else return s;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function removeTrailingSlash(s: string) {
|
|
44
|
+
if (s.endsWith("/"))
|
|
45
|
+
return s.slice(0, -1);
|
|
46
|
+
else return s;
|
|
47
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { buildEntityPropertiesFromData } from "../collection_builder";
|
|
2
|
+
import { DataType } from "firecms";
|
|
3
|
+
|
|
4
|
+
import usage from "./usage.json" assert {
|
|
5
|
+
type: 'json',
|
|
6
|
+
integrity: 'sha384-ABC123'
|
|
7
|
+
};
|
|
8
|
+
import * as util from "util";
|
|
9
|
+
|
|
10
|
+
buildEntityPropertiesFromData(usage, getType)
|
|
11
|
+
.then((res) => console.log(util.inspect(res, { showHidden: false, depth: null, colors: true })));
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
function getType(value: any): DataType {
|
|
15
|
+
if (typeof value === "number")
|
|
16
|
+
return "number";
|
|
17
|
+
else if (typeof value === "string")
|
|
18
|
+
return "string";
|
|
19
|
+
else if (typeof value === "boolean")
|
|
20
|
+
return "boolean";
|
|
21
|
+
else if (Array.isArray(value))
|
|
22
|
+
return "array";
|
|
23
|
+
else if (value && "_seconds" in value && "_nanoseconds" in value)
|
|
24
|
+
return "date";
|
|
25
|
+
else if (value && "id" in value && "path" in value)
|
|
26
|
+
return "reference";
|
|
27
|
+
return "map";
|
|
28
|
+
}
|