@firecms/schema_inference 3.0.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 FireCMS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ # schema_inference
package/package.json ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "version": "3.0.0-alpha.4",
3
+ "name": "@firecms/schema_inference",
4
+ "access": "public",
5
+ "packageManager": "yarn@3.2.3",
6
+ "type": "module",
7
+ "main": "./dist/index.umd.js",
8
+ "module": "./dist/index.es.js",
9
+ "types": "dist/index.d.ts",
10
+ "source": "src/index.ts",
11
+ "exports": {
12
+ ".": {
13
+ "import": "./dist/index.es.js",
14
+ "require": "./dist/index.umd.js"
15
+ }
16
+ },
17
+ "dependencies": {
18
+ "@types/node": "^20.5.9",
19
+ "firecms": "^3.0.0-alpha.4",
20
+ "typescript": "^5.2.2"
21
+ },
22
+ "scripts": {
23
+ "dev": "vite",
24
+ "build": "vite build && tsc --emitDeclarationOnly"
25
+ },
26
+ "gitHead": "17bba2feea1f6c818c0d9d4b3d6c8e4dfd4e5b4b"
27
+ }
@@ -0,0 +1,16 @@
1
+ import { InferencePropertyBuilderProps } from "../types";
2
+ import { findCommonInitialStringInPath } from "../strings";
3
+ import { Property } from "firecms";
4
+
5
+ export function buildReferenceProperty({
6
+ totalDocsCount,
7
+ valuesResult
8
+ }: InferencePropertyBuilderProps): Property {
9
+
10
+ const property: Property = {
11
+ dataType: "reference",
12
+ path: findCommonInitialStringInPath(valuesResult) ?? "!!!FIX_ME!!!"
13
+ };
14
+
15
+ return property;
16
+ }
@@ -0,0 +1,107 @@
1
+ import { FileType, Property, StringProperty, unslugify } from "firecms";
2
+ import { InferencePropertyBuilderProps, ValuesCountEntry } from "../types";
3
+ import { findCommonInitialStringInPath } from "../strings";
4
+ import { extractEnumFromValues } from "../util";
5
+
6
+ const IMAGE_EXTENSIONS = [".jpg", ".png", ".webp", ".gif"];
7
+ const AUDIO_EXTENSIONS = [".mp3", ".ogg", ".opus", ".aac"];
8
+ const VIDEO_EXTENSIONS = [".avi", ".mp4"];
9
+
10
+ const emailRegEx = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
11
+
12
+
13
+ export function buildStringProperty({
14
+ totalDocsCount,
15
+ valuesResult
16
+ }: InferencePropertyBuilderProps): Property {
17
+
18
+ let stringProperty: Property = {
19
+ dataType: "string",
20
+
21
+ };
22
+
23
+ if (valuesResult) {
24
+
25
+ const totalEntriesCount = valuesResult.values.length;
26
+ const totalValues = Array.from(valuesResult.valuesCount.keys()).length;
27
+
28
+ const config: Partial<StringProperty> = {};
29
+
30
+ const probablyAURL = valuesResult.values
31
+ .filter((value) => typeof value === "string" &&
32
+ value.toString().startsWith("http")).length > totalDocsCount / 3 * 2;
33
+ if (probablyAURL) {
34
+ config.url = true;
35
+ }
36
+
37
+ const probablyAnEmail = valuesResult.values
38
+ .filter((value) => typeof value === "string" &&
39
+ emailRegEx.test(value)).length > totalDocsCount / 3 * 2;
40
+ if (probablyAnEmail) {
41
+ config.email = true;
42
+ }
43
+
44
+ const probablyUserIds = valuesResult.values
45
+ .filter((value) => typeof value === "string" && value.length === 28 && !value.includes(" "))
46
+ .length > totalDocsCount / 3 * 2;
47
+ if (probablyUserIds)
48
+ config.readOnly = true;
49
+
50
+ if (!probablyAnEmail &&
51
+ !probablyAURL &&
52
+ !probablyUserIds &&
53
+ !probablyAURL &&
54
+ totalValues < totalEntriesCount / 3
55
+ ) {
56
+ const enumValues = extractEnumFromValues(Array.from(valuesResult.valuesCount.keys()));
57
+
58
+ if (Object.keys(enumValues).length > 1)
59
+ config.enumValues = enumValues;
60
+ }
61
+
62
+ // regular string
63
+ if (!probablyAnEmail &&
64
+ !probablyAURL &&
65
+ !probablyUserIds &&
66
+ !probablyAURL &&
67
+ !config.enumValues) {
68
+ const fileType = probableFileType(valuesResult, totalDocsCount);
69
+ if (fileType) {
70
+ config.storage = {
71
+ acceptedFiles: [fileType as FileType],
72
+ storagePath: findCommonInitialStringInPath(valuesResult) ?? "/"
73
+ };
74
+ }
75
+ }
76
+
77
+ if (Object.keys(config).length > 0)
78
+ stringProperty = {
79
+ ...stringProperty,
80
+ ...config
81
+ };
82
+ }
83
+
84
+ return stringProperty;
85
+ }
86
+
87
+ // TODO: support returning multiple types
88
+ function probableFileType(valuesCount: ValuesCountEntry, totalDocsCount: number): boolean | FileType {
89
+ const probablyAnImage = valuesCount.values
90
+ .filter((value) => typeof value === "string" &&
91
+ IMAGE_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
92
+
93
+ const probablyAudio = valuesCount.values
94
+ .filter((value) => typeof value === "string" &&
95
+ AUDIO_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
96
+
97
+ const probablyVideo = valuesCount.values
98
+ .filter((value) => typeof value === "string" &&
99
+ VIDEO_EXTENSIONS.some((extension) => value.toString().endsWith(extension))).length > totalDocsCount / 3 * 2;
100
+
101
+ const fileType: boolean | FileType = probablyAnImage
102
+ ? "image/*"
103
+ : probablyAudio
104
+ ? "audio/*"
105
+ : probablyVideo ? "video/*" : false;
106
+ return fileType;
107
+ }
@@ -0,0 +1,18 @@
1
+ import { PropertyValidationSchema } from "firecms";
2
+ import { InferencePropertyBuilderProps } from "../types";
3
+
4
+ export function buildValidation({
5
+ totalDocsCount,
6
+ valuesResult
7
+ }: InferencePropertyBuilderProps): PropertyValidationSchema | undefined {
8
+
9
+ if (valuesResult) {
10
+ const totalEntriesCount = valuesResult.values.length;
11
+ if (totalDocsCount === totalEntriesCount)
12
+ return {
13
+ required: true
14
+ }
15
+ }
16
+
17
+ return undefined;
18
+ }
@@ -0,0 +1,324 @@
1
+ import {
2
+ DataType,
3
+ EnumValues,
4
+ mergeDeep,
5
+ Properties,
6
+ Property,
7
+ resolveEnumValues,
8
+ StringProperty,
9
+ unslugify
10
+ } from "firecms";
11
+ import {
12
+ InferencePropertyBuilderProps,
13
+ TypesCount,
14
+ TypesCountRecord,
15
+ ValuesCountEntry,
16
+ ValuesCountRecord
17
+ } from "./types";
18
+ import { buildStringProperty } from "./builders/string_property_builder";
19
+ import { buildValidation } from "./builders/validation_builder";
20
+ import { buildReferenceProperty } from "./builders/reference_property_builder";
21
+ import { extractEnumFromValues } from "./util";
22
+
23
+ export type InferenceTypeBuilder = (value: any) => DataType;
24
+
25
+ export async function buildEntityPropertiesFromData(data: object[], getType: InferenceTypeBuilder): Promise<Properties> {
26
+ const typesCount: TypesCountRecord = {};
27
+ const valuesCount: ValuesCountRecord = {};
28
+ data.forEach((entry) => {
29
+ if (entry) {
30
+ Object.entries(entry).forEach(([key, value]) => {
31
+ increaseMapTypeCount(typesCount, key, value, getType);
32
+ increaseValuesCount(valuesCount, key, value, getType);
33
+ })
34
+ }
35
+ });
36
+ // console.log(util.inspect({ typesCount }, { showHidden: false, depth: null, colors: true }));
37
+ return buildPropertiesFromCount(data.length, typesCount, valuesCount);
38
+ }
39
+
40
+ export function buildPropertyFromData(data: any[], property: Property, getType: InferenceTypeBuilder): Property {
41
+ const typesCount = {};
42
+ const valuesCount: ValuesCountRecord = {};
43
+ data.forEach((entry) => {
44
+ increaseTypeCount(property.dataType, typesCount, entry, getType);
45
+ increaseValuesCount(valuesCount, "inferred_prop", entry, getType);
46
+ });
47
+ const enumValues = "enumValues" in property ? resolveEnumValues(property["enumValues"] as EnumValues) : undefined;
48
+ if (enumValues) {
49
+ const newEnumValues = extractEnumFromValues(Array.from(valuesCount["inferred_prop"].valuesCount.keys()));
50
+ console.log("newEnumValues", newEnumValues);
51
+ return { ...property, enumValues: [...newEnumValues, ...enumValues] } as StringProperty;
52
+ }
53
+ const generatedProperty = buildPropertyFromCount("inferred_prop", data.length, property.dataType, typesCount, valuesCount["inferred_prop"]);
54
+ return mergeDeep(generatedProperty, property);
55
+ }
56
+
57
+ export function buildPropertiesOrder(properties: Properties<any>): string [] {
58
+ function propOrder(s: string) {
59
+ const k = s.toLowerCase();
60
+ if (k === "title" || k === "name") return 3;
61
+ if (k.includes("title") || k.includes("name")) return 2;
62
+ if (k.includes("image") || k.includes("picture")) return 1;
63
+ return 0;
64
+ }
65
+
66
+ const keys = Object.keys(properties);
67
+ keys.sort(); // alphabetically
68
+ keys.sort((a, b) => {
69
+ return propOrder(b) - propOrder(a);
70
+ });
71
+ return keys;
72
+ }
73
+
74
+ /**
75
+ * @param type
76
+ * @param typesCount
77
+ * @param fieldValue
78
+ * @param getType
79
+ */
80
+ function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: any, getType: InferenceTypeBuilder) {
81
+ if (type === "map") {
82
+ if (fieldValue) {
83
+ let mapTypesCount = typesCount[type];
84
+ if (!mapTypesCount) {
85
+ mapTypesCount = {};
86
+ typesCount[type] = mapTypesCount;
87
+ }
88
+ Object.entries(fieldValue).forEach(([key, value]) => {
89
+ increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
90
+ })
91
+ }
92
+ } else if (type === "array") {
93
+ let arrayTypesCount = typesCount[type];
94
+ if (!arrayTypesCount) {
95
+ arrayTypesCount = {};
96
+ typesCount[type] = arrayTypesCount;
97
+ }
98
+ if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
99
+ const arrayType = getMostProbableTypeInArray(fieldValue, getType); // get type of first element
100
+ if (!arrayTypesCount[arrayType]) (arrayTypesCount[arrayType] as number) = 1;
101
+ else (arrayTypesCount[arrayType] as number)++;
102
+ }
103
+ } else {
104
+ if (!typesCount[type]) typesCount[type] = 1;
105
+ else (typesCount[type] as number)++;
106
+ }
107
+ }
108
+
109
+ function increaseMapTypeCount(
110
+ typesCountRecord: TypesCountRecord,
111
+ key: string,
112
+ fieldValue: any,
113
+ getType: InferenceTypeBuilder
114
+ ) {
115
+ let typesCount: TypesCount = typesCountRecord[key];
116
+ if (!typesCount) {
117
+ typesCount = {};
118
+ typesCountRecord[key] = typesCount;
119
+ }
120
+
121
+ if (fieldValue != null) { // Check that fieldValue is not null or undefined before proceeding
122
+ const type = getType(fieldValue);
123
+ increaseTypeCount(type, typesCount, fieldValue, getType);
124
+ }
125
+ }
126
+
127
+ function increaseValuesCount(
128
+ typeValuesRecord: ValuesCountRecord,
129
+ key: string,
130
+ fieldValue: any,
131
+ getType: InferenceTypeBuilder
132
+ ) {
133
+
134
+ const dataType = getType(fieldValue);
135
+
136
+ let valuesRecord: {
137
+ values: any[];
138
+ valuesCount: Map<any, number>;
139
+ map?: ValuesCountRecord;
140
+ } = typeValuesRecord[key];
141
+
142
+ if (!valuesRecord) {
143
+ valuesRecord = {
144
+ values: [],
145
+ valuesCount: new Map()
146
+ };
147
+ typeValuesRecord[key] = valuesRecord;
148
+ }
149
+
150
+ if (dataType === "map") {
151
+ let mapValuesRecord: ValuesCountRecord | undefined = valuesRecord.map;
152
+ if (!mapValuesRecord) {
153
+ mapValuesRecord = {};
154
+ valuesRecord.map = mapValuesRecord;
155
+ }
156
+ if (fieldValue)
157
+ Object.entries(fieldValue).forEach(([key, value]) => increaseValuesCount(mapValuesRecord as ValuesCountRecord, key, value, getType))
158
+ } else if (dataType === "array") {
159
+ if (Array.isArray(fieldValue)) {
160
+ fieldValue.forEach((value) => {
161
+ valuesRecord.values.push(value);
162
+ valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
163
+ })
164
+ }
165
+ } else {
166
+ if (fieldValue) {
167
+ valuesRecord.values.push(fieldValue);
168
+ valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
169
+ }
170
+ }
171
+
172
+ }
173
+
174
+ function getHighestTypesCount(typesCount: TypesCount): number {
175
+ let highestCount = 0;
176
+ Object.entries(typesCount).forEach(([type, count]) => {
177
+ let countValue = 0;
178
+ if (type === "map") {
179
+ countValue = getHighestRecordCount(count as TypesCountRecord);
180
+ } else if (type === "array") {
181
+ countValue = getHighestTypesCount(count as TypesCount);
182
+ } else {
183
+ countValue = count as number;
184
+ }
185
+ if (countValue > highestCount) {
186
+ highestCount = countValue;
187
+ }
188
+ });
189
+
190
+ return highestCount;
191
+ }
192
+
193
+ function getHighestRecordCount(record: TypesCountRecord): number {
194
+ return Object.entries(record)
195
+ .map(([key, typesCount]) => getHighestTypesCount(typesCount))
196
+ .reduce((a, b) => Math.max(a, b), 0);
197
+ }
198
+
199
+ function getMostProbableType(typesCount: TypesCount): DataType {
200
+ let highestCount = -1;
201
+ let probableType: DataType = "string"; //default
202
+ Object.entries(typesCount).forEach(([type, count]) => {
203
+ let countValue;
204
+ if (type === "map") {
205
+ countValue = getHighestRecordCount(count as TypesCountRecord);
206
+ } else if (type === "array") {
207
+ countValue = getHighestTypesCount(count as TypesCount);
208
+ } else {
209
+ countValue = count as number;
210
+ }
211
+ if (countValue > highestCount) {
212
+ highestCount = countValue;
213
+ probableType = type as DataType;
214
+ }
215
+ });
216
+ return probableType;
217
+ }
218
+
219
+ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbableType: DataType, typesCount: TypesCount, valuesResult?: ValuesCountEntry): Property {
220
+ let title: string | undefined;
221
+
222
+ if (key) {
223
+ title = unslugify(key);
224
+ }
225
+
226
+ if (mostProbableType === "map") {
227
+
228
+ const highVariability = checkTypesCountHighVariability(typesCount);
229
+ if (highVariability) {
230
+ return {
231
+ dataType: "map",
232
+ name: title,
233
+ keyValue: true,
234
+ properties: {}
235
+ };
236
+ }
237
+ const properties = buildPropertiesFromCount(totalDocsCount, typesCount.map as TypesCountRecord, valuesResult ? valuesResult.mapValues : undefined);
238
+ return {
239
+ dataType: "map",
240
+ name: title,
241
+ properties
242
+ };
243
+ } else if (mostProbableType === "array") {
244
+ const arrayTypesCount = typesCount.array as TypesCount;
245
+ const arrayMostProbableType = getMostProbableType(arrayTypesCount);
246
+ const of = buildPropertyFromCount(key, totalDocsCount, arrayMostProbableType, arrayTypesCount, valuesResult);
247
+ return {
248
+ dataType: "array",
249
+ name: title,
250
+ of
251
+ };
252
+ }
253
+ let result: Property;
254
+ const propertyProps: InferencePropertyBuilderProps = {
255
+ name: key,
256
+ totalDocsCount,
257
+ valuesResult
258
+ };
259
+ if (mostProbableType === "string") {
260
+ result = buildStringProperty(propertyProps);
261
+ } else if (mostProbableType === "reference") {
262
+ result = buildReferenceProperty(propertyProps);
263
+ } else {
264
+ result = {
265
+ dataType: mostProbableType,
266
+
267
+ } as Property;
268
+ }
269
+
270
+ if (title) {
271
+ result.name = title;
272
+ }
273
+
274
+ const validation = buildValidation(propertyProps);
275
+ if (validation) {
276
+ result.validation = validation;
277
+ }
278
+
279
+ return result;
280
+ }
281
+
282
+ function buildPropertiesFromCount(totalDocsCount: number, typesCountRecord: TypesCountRecord, valuesCountRecord?: ValuesCountRecord): Properties {
283
+ const res: Properties = {};
284
+ Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
285
+ const mostProbableType = getMostProbableType(typesCount);
286
+ res[key] = buildPropertyFromCount(key, totalDocsCount, mostProbableType, typesCount, valuesCountRecord ? valuesCountRecord[key] : undefined);
287
+ })
288
+ return res;
289
+ }
290
+
291
+ function countMaxDocumentsUnder(typesCount: TypesCount) {
292
+ let count = 0;
293
+ Object.entries(typesCount).forEach(([type, value]) => {
294
+ // console.log(util.inspect({ type, value }, { showHidden: false, depth: null, colors: true }));
295
+ if (typeof value === "object") {
296
+ count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
297
+ } else {
298
+ count = Math.max(count, value as number);
299
+ }
300
+ });
301
+ return count;
302
+ }
303
+
304
+ function getMostProbableTypeInArray(array: any[], getType: InferenceTypeBuilder): DataType {
305
+ let typesCount: TypesCount = {};
306
+ array.forEach((value) => {
307
+ increaseTypeCount(getType(value), typesCount, value, getType);
308
+ });
309
+ return getMostProbableType(typesCount);
310
+ }
311
+
312
+ function checkTypesCountHighVariability(typesCount: TypesCount) {
313
+ const maxCount = countMaxDocumentsUnder(typesCount);
314
+ let keysWithFewValues = 0;
315
+ Object.entries(typesCount.map ?? {})
316
+ .forEach(([key, value]) => {
317
+ const count = countMaxDocumentsUnder(value);
318
+ if (count < maxCount / 3) {
319
+ keysWithFewValues++;
320
+ }
321
+ });
322
+ return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
323
+ }
324
+
package/src/index.ts ADDED
@@ -0,0 +1,2 @@
1
+ export * from "./collection_builder";
2
+ export * from "./util";
package/src/strings.ts ADDED
@@ -0,0 +1,47 @@
1
+ import { ValuesCountEntry } from "./types";
2
+ import { DocumentReference } from "firebase/firestore";
3
+
4
+
5
+ export function findCommonInitialStringInPath(valuesCount?: ValuesCountEntry) {
6
+
7
+ if (!valuesCount) return undefined;
8
+
9
+ function getPath(value: any) {
10
+ if (typeof value === "string") return value;
11
+ else if (value instanceof DocumentReference) return value.path;
12
+ else return undefined;
13
+ }
14
+
15
+ const strings: string[] = valuesCount.values.map((v) => getPath(v)).filter(v => !!v) as string[];
16
+ const pathWithSlash = strings.find((s) => s.includes("/"));
17
+ if (!pathWithSlash)
18
+ return undefined;
19
+
20
+ const searchedPath = pathWithSlash.substr(0, pathWithSlash.lastIndexOf("/"));
21
+
22
+ const yep = valuesCount.values
23
+ .filter((value) => {
24
+ const path = getPath(value);
25
+ if (!path) return false;
26
+ return path.startsWith(searchedPath)
27
+ }).length > valuesCount.values.length / 3 * 2;
28
+
29
+ return yep ? searchedPath : undefined;
30
+
31
+ }
32
+
33
+ export function removeInitialAndTrailingSlashes(s: string): string {
34
+ return removeInitialSlash(removeTrailingSlash(s));
35
+ }
36
+
37
+ export function removeInitialSlash(s: string) {
38
+ if (s.startsWith("/"))
39
+ return s.slice(1);
40
+ else return s;
41
+ }
42
+
43
+ export function removeTrailingSlash(s: string) {
44
+ if (s.endsWith("/"))
45
+ return s.slice(0, -1);
46
+ else return s;
47
+ }
@@ -0,0 +1,28 @@
1
+ import { buildEntityPropertiesFromData } from "../collection_builder";
2
+ import { DataType } from "firecms";
3
+
4
+ import usage from "./usage.json" assert {
5
+ type: 'json',
6
+ integrity: 'sha384-ABC123'
7
+ };
8
+ import * as util from "util";
9
+
10
+ buildEntityPropertiesFromData(usage, getType)
11
+ .then((res) => console.log(util.inspect(res, { showHidden: false, depth: null, colors: true })));
12
+
13
+
14
+ function getType(value: any): DataType {
15
+ if (typeof value === "number")
16
+ return "number";
17
+ else if (typeof value === "string")
18
+ return "string";
19
+ else if (typeof value === "boolean")
20
+ return "boolean";
21
+ else if (Array.isArray(value))
22
+ return "array";
23
+ else if (value && "_seconds" in value && "_nanoseconds" in value)
24
+ return "date";
25
+ else if (value && "id" in value && "path" in value)
26
+ return "reference";
27
+ return "map";
28
+ }