@firecms/schema_inference 3.0.0-canary.5 → 3.0.0-canary.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +113 -21
- package/dist/collection_builder.d.ts +2 -1
- package/dist/index.es.js +139 -99
- package/dist/index.es.js.map +1 -1
- package/dist/index.umd.js +1 -1
- package/dist/index.umd.js.map +1 -1
- package/package.json +7 -8
- package/src/builders/string_property_builder.ts +1 -2
- package/src/collection_builder.ts +124 -36
- package/src/strings.ts +1 -1
- package/src/test_schemas/pop_products.json +948 -0
- package/src/test_schemas/test_schema.ts +5 -1
@@ -5,8 +5,7 @@ import {
|
|
5
5
|
Properties,
|
6
6
|
Property,
|
7
7
|
resolveEnumValues,
|
8
|
-
StringProperty
|
9
|
-
unslugify
|
8
|
+
StringProperty
|
10
9
|
} from "@firecms/core";
|
11
10
|
import {
|
12
11
|
InferencePropertyBuilderProps,
|
@@ -22,7 +21,10 @@ import { extractEnumFromValues } from "./util";
|
|
22
21
|
|
23
22
|
export type InferenceTypeBuilder = (value: any) => DataType;
|
24
23
|
|
25
|
-
export async function buildEntityPropertiesFromData(
|
24
|
+
export async function buildEntityPropertiesFromData(
|
25
|
+
data: object[],
|
26
|
+
getType: InferenceTypeBuilder
|
27
|
+
): Promise<Properties> {
|
26
28
|
const typesCount: TypesCountRecord = {};
|
27
29
|
const valuesCount: ValuesCountRecord = {};
|
28
30
|
if (data) {
|
@@ -31,15 +33,18 @@ export async function buildEntityPropertiesFromData(data: object[], getType: Inf
|
|
31
33
|
Object.entries(entry).forEach(([key, value]) => {
|
32
34
|
increaseMapTypeCount(typesCount, key, value, getType);
|
33
35
|
increaseValuesCount(valuesCount, key, value, getType);
|
34
|
-
})
|
36
|
+
});
|
35
37
|
}
|
36
38
|
});
|
37
39
|
}
|
38
|
-
// console.log(util.inspect({ typesCount }, { showHidden: false, depth: null, colors: true }));
|
39
40
|
return buildPropertiesFromCount(data.length, typesCount, valuesCount);
|
40
41
|
}
|
41
42
|
|
42
|
-
export function buildPropertyFromData(
|
43
|
+
export function buildPropertyFromData(
|
44
|
+
data: any[],
|
45
|
+
property: Property,
|
46
|
+
getType: InferenceTypeBuilder
|
47
|
+
): Property {
|
43
48
|
const typesCount = {};
|
44
49
|
const valuesCount: ValuesCountRecord = {};
|
45
50
|
if (data) {
|
@@ -56,13 +61,25 @@ export function buildPropertyFromData(data: any[], property: Property, getType:
|
|
56
61
|
enumValues: [...newEnumValues, ...enumValues]
|
57
62
|
} as StringProperty;
|
58
63
|
}
|
59
|
-
const generatedProperty = buildPropertyFromCount(
|
64
|
+
const generatedProperty = buildPropertyFromCount(
|
65
|
+
"inferred_prop",
|
66
|
+
data.length,
|
67
|
+
property.dataType,
|
68
|
+
typesCount,
|
69
|
+
valuesCount["inferred_prop"]
|
70
|
+
);
|
60
71
|
return mergeDeep(generatedProperty, property);
|
61
72
|
}
|
62
73
|
|
63
|
-
export function buildPropertiesOrder(
|
74
|
+
export function buildPropertiesOrder(
|
75
|
+
properties: Properties<any>,
|
76
|
+
priorityKeys?: string[]
|
77
|
+
): string[] {
|
78
|
+
const lowerCasePriorityKeys = (priorityKeys ?? []).map((key) => key.toLowerCase());
|
79
|
+
|
64
80
|
function propOrder(s: string) {
|
65
81
|
const k = s.toLowerCase();
|
82
|
+
if (lowerCasePriorityKeys.includes(k)) return 4;
|
66
83
|
if (k === "title" || k === "name") return 3;
|
67
84
|
if (k.includes("title") || k.includes("name")) return 2;
|
68
85
|
if (k.includes("image") || k.includes("picture")) return 1;
|
@@ -83,7 +100,12 @@ export function buildPropertiesOrder(properties: Properties<any>): string [] {
|
|
83
100
|
* @param fieldValue
|
84
101
|
* @param getType
|
85
102
|
*/
|
86
|
-
function increaseTypeCount(
|
103
|
+
function increaseTypeCount(
|
104
|
+
type: DataType,
|
105
|
+
typesCount: TypesCount,
|
106
|
+
fieldValue: any,
|
107
|
+
getType: InferenceTypeBuilder
|
108
|
+
) {
|
87
109
|
if (type === "map") {
|
88
110
|
if (fieldValue) {
|
89
111
|
let mapTypesCount = typesCount[type];
|
@@ -93,7 +115,7 @@ function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: a
|
|
93
115
|
}
|
94
116
|
Object.entries(fieldValue).forEach(([key, value]) => {
|
95
117
|
increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
|
96
|
-
})
|
118
|
+
});
|
97
119
|
}
|
98
120
|
} else if (type === "array") {
|
99
121
|
let arrayTypesCount = typesCount[type];
|
@@ -102,9 +124,22 @@ function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: a
|
|
102
124
|
typesCount[type] = arrayTypesCount;
|
103
125
|
}
|
104
126
|
if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
|
105
|
-
const arrayType = getMostProbableTypeInArray(fieldValue, getType);
|
106
|
-
if (
|
107
|
-
|
127
|
+
const arrayType = getMostProbableTypeInArray(fieldValue, getType);
|
128
|
+
if (arrayType === "map") {
|
129
|
+
let mapTypesCount = arrayTypesCount[arrayType];
|
130
|
+
if (!mapTypesCount) {
|
131
|
+
mapTypesCount = {};
|
132
|
+
}
|
133
|
+
fieldValue.forEach((value) => {
|
134
|
+
Object.entries(value).forEach(([key, v]) =>
|
135
|
+
increaseMapTypeCount(mapTypesCount, key, v, getType)
|
136
|
+
);
|
137
|
+
});
|
138
|
+
arrayTypesCount[arrayType] = mapTypesCount;
|
139
|
+
} else {
|
140
|
+
if (!arrayTypesCount[arrayType]) arrayTypesCount[arrayType] = 1;
|
141
|
+
else (arrayTypesCount[arrayType] as number)++;
|
142
|
+
}
|
108
143
|
}
|
109
144
|
} else {
|
110
145
|
if (!typesCount[type]) typesCount[type] = 1;
|
@@ -124,7 +159,8 @@ function increaseMapTypeCount(
|
|
124
159
|
typesCountRecord[key] = typesCount;
|
125
160
|
}
|
126
161
|
|
127
|
-
if (fieldValue != null) {
|
162
|
+
if (fieldValue != null) {
|
163
|
+
// Check that fieldValue is not null or undefined before proceeding
|
128
164
|
const type = getType(fieldValue);
|
129
165
|
increaseTypeCount(type, typesCount, fieldValue, getType);
|
130
166
|
}
|
@@ -136,7 +172,6 @@ function increaseValuesCount(
|
|
136
172
|
fieldValue: any,
|
137
173
|
getType: InferenceTypeBuilder
|
138
174
|
) {
|
139
|
-
|
140
175
|
const dataType = getType(fieldValue);
|
141
176
|
|
142
177
|
let valuesRecord: {
|
@@ -160,13 +195,15 @@ function increaseValuesCount(
|
|
160
195
|
valuesRecord.map = mapValuesRecord;
|
161
196
|
}
|
162
197
|
if (fieldValue)
|
163
|
-
Object.entries(fieldValue).forEach(([key, value]) =>
|
198
|
+
Object.entries(fieldValue).forEach(([key, value]) =>
|
199
|
+
increaseValuesCount(mapValuesRecord as ValuesCountRecord, key, value, getType)
|
200
|
+
);
|
164
201
|
} else if (dataType === "array") {
|
165
202
|
if (Array.isArray(fieldValue)) {
|
166
203
|
fieldValue.forEach((value) => {
|
167
204
|
valuesRecord.values.push(value);
|
168
205
|
valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
|
169
|
-
})
|
206
|
+
});
|
170
207
|
}
|
171
208
|
} else {
|
172
209
|
if (fieldValue) {
|
@@ -174,7 +211,6 @@ function increaseValuesCount(
|
|
174
211
|
valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
|
175
212
|
}
|
176
213
|
}
|
177
|
-
|
178
214
|
}
|
179
215
|
|
180
216
|
function getHighestTypesCount(typesCount: TypesCount): number {
|
@@ -204,7 +240,7 @@ function getHighestRecordCount(record: TypesCountRecord): number {
|
|
204
240
|
|
205
241
|
function getMostProbableType(typesCount: TypesCount): DataType {
|
206
242
|
let highestCount = -1;
|
207
|
-
let probableType: DataType = "string"; //default
|
243
|
+
let probableType: DataType = "string"; // default
|
208
244
|
Object.entries(typesCount).forEach(([type, count]) => {
|
209
245
|
let countValue;
|
210
246
|
if (type === "map") {
|
@@ -222,16 +258,21 @@ function getMostProbableType(typesCount: TypesCount): DataType {
|
|
222
258
|
return probableType;
|
223
259
|
}
|
224
260
|
|
225
|
-
function buildPropertyFromCount(
|
261
|
+
function buildPropertyFromCount(
|
262
|
+
key: string,
|
263
|
+
totalDocsCount: number,
|
264
|
+
mostProbableType: DataType,
|
265
|
+
typesCount: TypesCount,
|
266
|
+
valuesResult?: ValuesCountEntry
|
267
|
+
): Property {
|
226
268
|
let title: string | undefined;
|
227
269
|
|
228
270
|
if (key) {
|
229
|
-
title =
|
271
|
+
title = formatString(key.toLowerCase());
|
230
272
|
}
|
231
273
|
|
232
274
|
let result: Property | undefined = undefined;
|
233
275
|
if (mostProbableType === "map") {
|
234
|
-
|
235
276
|
const highVariability = checkTypesCountHighVariability(typesCount);
|
236
277
|
if (highVariability) {
|
237
278
|
result = {
|
@@ -241,7 +282,11 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
241
282
|
properties: {}
|
242
283
|
};
|
243
284
|
}
|
244
|
-
const properties = buildPropertiesFromCount(
|
285
|
+
const properties = buildPropertiesFromCount(
|
286
|
+
totalDocsCount,
|
287
|
+
typesCount.map as TypesCountRecord,
|
288
|
+
valuesResult ? valuesResult.mapValues : undefined
|
289
|
+
);
|
245
290
|
result = {
|
246
291
|
dataType: "map",
|
247
292
|
name: title,
|
@@ -250,13 +295,20 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
250
295
|
} else if (mostProbableType === "array") {
|
251
296
|
const arrayTypesCount = typesCount.array as TypesCount;
|
252
297
|
const arrayMostProbableType = getMostProbableType(arrayTypesCount);
|
253
|
-
const of = buildPropertyFromCount(
|
298
|
+
const of = buildPropertyFromCount(
|
299
|
+
key,
|
300
|
+
totalDocsCount,
|
301
|
+
arrayMostProbableType,
|
302
|
+
arrayTypesCount,
|
303
|
+
valuesResult
|
304
|
+
);
|
254
305
|
result = {
|
255
306
|
dataType: "array",
|
256
307
|
name: title,
|
257
308
|
of
|
258
309
|
};
|
259
310
|
}
|
311
|
+
|
260
312
|
if (!result) {
|
261
313
|
const propertyProps: InferencePropertyBuilderProps = {
|
262
314
|
name: key,
|
@@ -289,19 +341,28 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
289
341
|
};
|
290
342
|
}
|
291
343
|
|
292
|
-
function buildPropertiesFromCount(
|
344
|
+
function buildPropertiesFromCount(
|
345
|
+
totalDocsCount: number,
|
346
|
+
typesCountRecord: TypesCountRecord,
|
347
|
+
valuesCountRecord?: ValuesCountRecord
|
348
|
+
): Properties {
|
293
349
|
const res: Properties = {};
|
294
350
|
Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
|
295
351
|
const mostProbableType = getMostProbableType(typesCount);
|
296
|
-
res[key] = buildPropertyFromCount(
|
297
|
-
|
352
|
+
res[key] = buildPropertyFromCount(
|
353
|
+
key,
|
354
|
+
totalDocsCount,
|
355
|
+
mostProbableType,
|
356
|
+
typesCount,
|
357
|
+
valuesCountRecord ? valuesCountRecord[key] : undefined
|
358
|
+
);
|
359
|
+
});
|
298
360
|
return res;
|
299
361
|
}
|
300
362
|
|
301
363
|
function countMaxDocumentsUnder(typesCount: TypesCount) {
|
302
364
|
let count = 0;
|
303
365
|
Object.entries(typesCount).forEach(([type, value]) => {
|
304
|
-
// console.log(util.inspect({ type, value }, { showHidden: false, depth: null, colors: true }));
|
305
366
|
if (typeof value === "object") {
|
306
367
|
count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
|
307
368
|
} else {
|
@@ -311,7 +372,10 @@ function countMaxDocumentsUnder(typesCount: TypesCount) {
|
|
311
372
|
return count;
|
312
373
|
}
|
313
374
|
|
314
|
-
function getMostProbableTypeInArray(
|
375
|
+
function getMostProbableTypeInArray(
|
376
|
+
array: any[],
|
377
|
+
getType: InferenceTypeBuilder
|
378
|
+
): DataType {
|
315
379
|
let typesCount: TypesCount = {};
|
316
380
|
array.forEach((value) => {
|
317
381
|
increaseTypeCount(getType(value), typesCount, value, getType);
|
@@ -322,13 +386,37 @@ function getMostProbableTypeInArray(array: any[], getType: InferenceTypeBuilder)
|
|
322
386
|
function checkTypesCountHighVariability(typesCount: TypesCount) {
|
323
387
|
const maxCount = countMaxDocumentsUnder(typesCount);
|
324
388
|
let keysWithFewValues = 0;
|
325
|
-
Object.entries(typesCount.map ?? {})
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
});
|
389
|
+
Object.entries(typesCount.map ?? {}).forEach(([key, value]) => {
|
390
|
+
const count = countMaxDocumentsUnder(value);
|
391
|
+
if (count < maxCount / 3) {
|
392
|
+
keysWithFewValues++;
|
393
|
+
}
|
394
|
+
});
|
332
395
|
return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
|
333
396
|
}
|
334
397
|
|
398
|
+
function formatString(input: string): string {
|
399
|
+
const normalized = input
|
400
|
+
.replace(/[_\-]+/g, " ")
|
401
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
402
|
+
.toLowerCase();
|
403
|
+
|
404
|
+
// Split the normalized string into words
|
405
|
+
const words = normalized.split(" ");
|
406
|
+
|
407
|
+
// Capitalize the first letter of each word and join them with a space
|
408
|
+
const formatted = words
|
409
|
+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
410
|
+
.join(" ");
|
411
|
+
|
412
|
+
return formatted;
|
413
|
+
}
|
414
|
+
|
415
|
+
export function inferTypeFromValue(value: any): DataType {
|
416
|
+
if (typeof value === "string") return "string";
|
417
|
+
if (typeof value === "number") return "number";
|
418
|
+
if (typeof value === "boolean") return "boolean";
|
419
|
+
if (Array.isArray(value)) return "array";
|
420
|
+
if (typeof value === "object") return "map";
|
421
|
+
return "string";
|
422
|
+
}
|
package/src/strings.ts
CHANGED