@firecms/schema_inference 3.0.0-canary.8 → 3.0.0-canary.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +113 -21
- package/dist/collection_builder.d.ts +3 -2
- package/dist/index.es.js +141 -103
- package/dist/index.es.js.map +1 -1
- package/dist/index.umd.js +1 -1
- package/dist/index.umd.js.map +1 -1
- package/package.json +7 -8
- package/src/builders/string_property_builder.ts +1 -2
- package/src/collection_builder.ts +127 -38
- package/src/strings.ts +1 -1
- package/src/test_schemas/pop_products.json +948 -0
- package/src/test_schemas/test_schema.ts +5 -1
@@ -2,11 +2,10 @@ import {
|
|
2
2
|
DataType,
|
3
3
|
EnumValues,
|
4
4
|
mergeDeep,
|
5
|
-
Properties,
|
5
|
+
Properties, PropertiesOrBuilders,
|
6
6
|
Property,
|
7
7
|
resolveEnumValues,
|
8
|
-
StringProperty
|
9
|
-
unslugify
|
8
|
+
StringProperty
|
10
9
|
} from "@firecms/core";
|
11
10
|
import {
|
12
11
|
InferencePropertyBuilderProps,
|
@@ -22,7 +21,10 @@ import { extractEnumFromValues } from "./util";
|
|
22
21
|
|
23
22
|
export type InferenceTypeBuilder = (value: any) => DataType;
|
24
23
|
|
25
|
-
export async function buildEntityPropertiesFromData(
|
24
|
+
export async function buildEntityPropertiesFromData(
|
25
|
+
data: object[],
|
26
|
+
getType: InferenceTypeBuilder
|
27
|
+
): Promise<Properties> {
|
26
28
|
const typesCount: TypesCountRecord = {};
|
27
29
|
const valuesCount: ValuesCountRecord = {};
|
28
30
|
if (data) {
|
@@ -31,15 +33,18 @@ export async function buildEntityPropertiesFromData(data: object[], getType: Inf
|
|
31
33
|
Object.entries(entry).forEach(([key, value]) => {
|
32
34
|
increaseMapTypeCount(typesCount, key, value, getType);
|
33
35
|
increaseValuesCount(valuesCount, key, value, getType);
|
34
|
-
})
|
36
|
+
});
|
35
37
|
}
|
36
38
|
});
|
37
39
|
}
|
38
|
-
// console.log(util.inspect({ typesCount }, { showHidden: false, depth: null, colors: true }));
|
39
40
|
return buildPropertiesFromCount(data.length, typesCount, valuesCount);
|
40
41
|
}
|
41
42
|
|
42
|
-
export function buildPropertyFromData(
|
43
|
+
export function buildPropertyFromData(
|
44
|
+
data: any[],
|
45
|
+
property: Property,
|
46
|
+
getType: InferenceTypeBuilder
|
47
|
+
): Property {
|
43
48
|
const typesCount = {};
|
44
49
|
const valuesCount: ValuesCountRecord = {};
|
45
50
|
if (data) {
|
@@ -56,20 +61,33 @@ export function buildPropertyFromData(data: any[], property: Property, getType:
|
|
56
61
|
enumValues: [...newEnumValues, ...enumValues]
|
57
62
|
} as StringProperty;
|
58
63
|
}
|
59
|
-
const generatedProperty = buildPropertyFromCount(
|
64
|
+
const generatedProperty = buildPropertyFromCount(
|
65
|
+
"inferred_prop",
|
66
|
+
data.length,
|
67
|
+
property.dataType,
|
68
|
+
typesCount,
|
69
|
+
valuesCount["inferred_prop"]
|
70
|
+
);
|
60
71
|
return mergeDeep(generatedProperty, property);
|
61
72
|
}
|
62
73
|
|
63
|
-
export function buildPropertiesOrder(
|
74
|
+
export function buildPropertiesOrder(
|
75
|
+
properties: PropertiesOrBuilders,
|
76
|
+
propertiesOrder?: string[],
|
77
|
+
priorityKeys?: string[]
|
78
|
+
): string[] {
|
79
|
+
const lowerCasePriorityKeys = (priorityKeys ?? []).map((key) => key.toLowerCase());
|
80
|
+
|
64
81
|
function propOrder(s: string) {
|
65
82
|
const k = s.toLowerCase();
|
83
|
+
if (lowerCasePriorityKeys.includes(k)) return 4;
|
66
84
|
if (k === "title" || k === "name") return 3;
|
67
85
|
if (k.includes("title") || k.includes("name")) return 2;
|
68
86
|
if (k.includes("image") || k.includes("picture")) return 1;
|
69
87
|
return 0;
|
70
88
|
}
|
71
89
|
|
72
|
-
const keys = Object.keys(properties);
|
90
|
+
const keys = propertiesOrder ?? Object.keys(properties);
|
73
91
|
keys.sort(); // alphabetically
|
74
92
|
keys.sort((a, b) => {
|
75
93
|
return propOrder(b) - propOrder(a);
|
@@ -83,7 +101,12 @@ export function buildPropertiesOrder(properties: Properties<any>): string [] {
|
|
83
101
|
* @param fieldValue
|
84
102
|
* @param getType
|
85
103
|
*/
|
86
|
-
function increaseTypeCount(
|
104
|
+
function increaseTypeCount(
|
105
|
+
type: DataType,
|
106
|
+
typesCount: TypesCount,
|
107
|
+
fieldValue: any,
|
108
|
+
getType: InferenceTypeBuilder
|
109
|
+
) {
|
87
110
|
if (type === "map") {
|
88
111
|
if (fieldValue) {
|
89
112
|
let mapTypesCount = typesCount[type];
|
@@ -93,7 +116,7 @@ function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: a
|
|
93
116
|
}
|
94
117
|
Object.entries(fieldValue).forEach(([key, value]) => {
|
95
118
|
increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
|
96
|
-
})
|
119
|
+
});
|
97
120
|
}
|
98
121
|
} else if (type === "array") {
|
99
122
|
let arrayTypesCount = typesCount[type];
|
@@ -102,9 +125,22 @@ function increaseTypeCount(type: DataType, typesCount: TypesCount, fieldValue: a
|
|
102
125
|
typesCount[type] = arrayTypesCount;
|
103
126
|
}
|
104
127
|
if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
|
105
|
-
const arrayType = getMostProbableTypeInArray(fieldValue, getType);
|
106
|
-
if (
|
107
|
-
|
128
|
+
const arrayType = getMostProbableTypeInArray(fieldValue, getType);
|
129
|
+
if (arrayType === "map") {
|
130
|
+
let mapTypesCount = arrayTypesCount[arrayType];
|
131
|
+
if (!mapTypesCount) {
|
132
|
+
mapTypesCount = {};
|
133
|
+
}
|
134
|
+
fieldValue.forEach((value) => {
|
135
|
+
Object.entries(value).forEach(([key, v]) =>
|
136
|
+
increaseMapTypeCount(mapTypesCount, key, v, getType)
|
137
|
+
);
|
138
|
+
});
|
139
|
+
arrayTypesCount[arrayType] = mapTypesCount;
|
140
|
+
} else {
|
141
|
+
if (!arrayTypesCount[arrayType]) arrayTypesCount[arrayType] = 1;
|
142
|
+
else (arrayTypesCount[arrayType] as number)++;
|
143
|
+
}
|
108
144
|
}
|
109
145
|
} else {
|
110
146
|
if (!typesCount[type]) typesCount[type] = 1;
|
@@ -124,7 +160,8 @@ function increaseMapTypeCount(
|
|
124
160
|
typesCountRecord[key] = typesCount;
|
125
161
|
}
|
126
162
|
|
127
|
-
if (fieldValue != null) {
|
163
|
+
if (fieldValue != null) {
|
164
|
+
// Check that fieldValue is not null or undefined before proceeding
|
128
165
|
const type = getType(fieldValue);
|
129
166
|
increaseTypeCount(type, typesCount, fieldValue, getType);
|
130
167
|
}
|
@@ -136,7 +173,6 @@ function increaseValuesCount(
|
|
136
173
|
fieldValue: any,
|
137
174
|
getType: InferenceTypeBuilder
|
138
175
|
) {
|
139
|
-
|
140
176
|
const dataType = getType(fieldValue);
|
141
177
|
|
142
178
|
let valuesRecord: {
|
@@ -160,13 +196,15 @@ function increaseValuesCount(
|
|
160
196
|
valuesRecord.map = mapValuesRecord;
|
161
197
|
}
|
162
198
|
if (fieldValue)
|
163
|
-
Object.entries(fieldValue).forEach(([key, value]) =>
|
199
|
+
Object.entries(fieldValue).forEach(([key, value]) =>
|
200
|
+
increaseValuesCount(mapValuesRecord as ValuesCountRecord, key, value, getType)
|
201
|
+
);
|
164
202
|
} else if (dataType === "array") {
|
165
203
|
if (Array.isArray(fieldValue)) {
|
166
204
|
fieldValue.forEach((value) => {
|
167
205
|
valuesRecord.values.push(value);
|
168
206
|
valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
|
169
|
-
})
|
207
|
+
});
|
170
208
|
}
|
171
209
|
} else {
|
172
210
|
if (fieldValue) {
|
@@ -174,7 +212,6 @@ function increaseValuesCount(
|
|
174
212
|
valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
|
175
213
|
}
|
176
214
|
}
|
177
|
-
|
178
215
|
}
|
179
216
|
|
180
217
|
function getHighestTypesCount(typesCount: TypesCount): number {
|
@@ -204,7 +241,7 @@ function getHighestRecordCount(record: TypesCountRecord): number {
|
|
204
241
|
|
205
242
|
function getMostProbableType(typesCount: TypesCount): DataType {
|
206
243
|
let highestCount = -1;
|
207
|
-
let probableType: DataType = "string"; //default
|
244
|
+
let probableType: DataType = "string"; // default
|
208
245
|
Object.entries(typesCount).forEach(([type, count]) => {
|
209
246
|
let countValue;
|
210
247
|
if (type === "map") {
|
@@ -222,16 +259,21 @@ function getMostProbableType(typesCount: TypesCount): DataType {
|
|
222
259
|
return probableType;
|
223
260
|
}
|
224
261
|
|
225
|
-
function buildPropertyFromCount(
|
262
|
+
function buildPropertyFromCount(
|
263
|
+
key: string,
|
264
|
+
totalDocsCount: number,
|
265
|
+
mostProbableType: DataType,
|
266
|
+
typesCount: TypesCount,
|
267
|
+
valuesResult?: ValuesCountEntry
|
268
|
+
): Property {
|
226
269
|
let title: string | undefined;
|
227
270
|
|
228
271
|
if (key) {
|
229
|
-
title =
|
272
|
+
title = formatString(key.toLowerCase());
|
230
273
|
}
|
231
274
|
|
232
275
|
let result: Property | undefined = undefined;
|
233
276
|
if (mostProbableType === "map") {
|
234
|
-
|
235
277
|
const highVariability = checkTypesCountHighVariability(typesCount);
|
236
278
|
if (highVariability) {
|
237
279
|
result = {
|
@@ -241,7 +283,11 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
241
283
|
properties: {}
|
242
284
|
};
|
243
285
|
}
|
244
|
-
const properties = buildPropertiesFromCount(
|
286
|
+
const properties = buildPropertiesFromCount(
|
287
|
+
totalDocsCount,
|
288
|
+
typesCount.map as TypesCountRecord,
|
289
|
+
valuesResult ? valuesResult.mapValues : undefined
|
290
|
+
);
|
245
291
|
result = {
|
246
292
|
dataType: "map",
|
247
293
|
name: title,
|
@@ -250,13 +296,20 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
250
296
|
} else if (mostProbableType === "array") {
|
251
297
|
const arrayTypesCount = typesCount.array as TypesCount;
|
252
298
|
const arrayMostProbableType = getMostProbableType(arrayTypesCount);
|
253
|
-
const of = buildPropertyFromCount(
|
299
|
+
const of = buildPropertyFromCount(
|
300
|
+
key,
|
301
|
+
totalDocsCount,
|
302
|
+
arrayMostProbableType,
|
303
|
+
arrayTypesCount,
|
304
|
+
valuesResult
|
305
|
+
);
|
254
306
|
result = {
|
255
307
|
dataType: "array",
|
256
308
|
name: title,
|
257
309
|
of
|
258
310
|
};
|
259
311
|
}
|
312
|
+
|
260
313
|
if (!result) {
|
261
314
|
const propertyProps: InferencePropertyBuilderProps = {
|
262
315
|
name: key,
|
@@ -289,19 +342,28 @@ function buildPropertyFromCount(key: string, totalDocsCount: number, mostProbabl
|
|
289
342
|
};
|
290
343
|
}
|
291
344
|
|
292
|
-
function buildPropertiesFromCount(
|
345
|
+
function buildPropertiesFromCount(
|
346
|
+
totalDocsCount: number,
|
347
|
+
typesCountRecord: TypesCountRecord,
|
348
|
+
valuesCountRecord?: ValuesCountRecord
|
349
|
+
): Properties {
|
293
350
|
const res: Properties = {};
|
294
351
|
Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
|
295
352
|
const mostProbableType = getMostProbableType(typesCount);
|
296
|
-
res[key] = buildPropertyFromCount(
|
297
|
-
|
353
|
+
res[key] = buildPropertyFromCount(
|
354
|
+
key,
|
355
|
+
totalDocsCount,
|
356
|
+
mostProbableType,
|
357
|
+
typesCount,
|
358
|
+
valuesCountRecord ? valuesCountRecord[key] : undefined
|
359
|
+
);
|
360
|
+
});
|
298
361
|
return res;
|
299
362
|
}
|
300
363
|
|
301
364
|
function countMaxDocumentsUnder(typesCount: TypesCount) {
|
302
365
|
let count = 0;
|
303
366
|
Object.entries(typesCount).forEach(([type, value]) => {
|
304
|
-
// console.log(util.inspect({ type, value }, { showHidden: false, depth: null, colors: true }));
|
305
367
|
if (typeof value === "object") {
|
306
368
|
count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
|
307
369
|
} else {
|
@@ -311,7 +373,10 @@ function countMaxDocumentsUnder(typesCount: TypesCount) {
|
|
311
373
|
return count;
|
312
374
|
}
|
313
375
|
|
314
|
-
function getMostProbableTypeInArray(
|
376
|
+
function getMostProbableTypeInArray(
|
377
|
+
array: any[],
|
378
|
+
getType: InferenceTypeBuilder
|
379
|
+
): DataType {
|
315
380
|
let typesCount: TypesCount = {};
|
316
381
|
array.forEach((value) => {
|
317
382
|
increaseTypeCount(getType(value), typesCount, value, getType);
|
@@ -322,13 +387,37 @@ function getMostProbableTypeInArray(array: any[], getType: InferenceTypeBuilder)
|
|
322
387
|
function checkTypesCountHighVariability(typesCount: TypesCount) {
|
323
388
|
const maxCount = countMaxDocumentsUnder(typesCount);
|
324
389
|
let keysWithFewValues = 0;
|
325
|
-
Object.entries(typesCount.map ?? {})
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
});
|
390
|
+
Object.entries(typesCount.map ?? {}).forEach(([key, value]) => {
|
391
|
+
const count = countMaxDocumentsUnder(value);
|
392
|
+
if (count < maxCount / 3) {
|
393
|
+
keysWithFewValues++;
|
394
|
+
}
|
395
|
+
});
|
332
396
|
return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
|
333
397
|
}
|
334
398
|
|
399
|
+
function formatString(input: string): string {
|
400
|
+
const normalized = input
|
401
|
+
.replace(/[_\-]+/g, " ")
|
402
|
+
.replace(/([a-z])([A-Z])/g, "$1 $2")
|
403
|
+
.toLowerCase();
|
404
|
+
|
405
|
+
// Split the normalized string into words
|
406
|
+
const words = normalized.split(" ");
|
407
|
+
|
408
|
+
// Capitalize the first letter of each word and join them with a space
|
409
|
+
const formatted = words
|
410
|
+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
411
|
+
.join(" ");
|
412
|
+
|
413
|
+
return formatted;
|
414
|
+
}
|
415
|
+
|
416
|
+
export function inferTypeFromValue(value: any): DataType {
|
417
|
+
if (typeof value === "string") return "string";
|
418
|
+
if (typeof value === "number") return "number";
|
419
|
+
if (typeof value === "boolean") return "boolean";
|
420
|
+
if (Array.isArray(value)) return "array";
|
421
|
+
if (typeof value === "object") return "map";
|
422
|
+
return "string";
|
423
|
+
}
|
package/src/strings.ts
CHANGED