@rebasepro/schema-inference 0.0.1-canary.4d4fb3e

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ import { ValuesCountEntry } from "./types";
2
+ /**
3
+ * Parse a reference string value which can be in the format:
4
+ * - Simple: "path/entityId"
5
+ * - With database: "database_name:::path/entityId"
6
+ * Returns the path and database (undefined if not specified or if "(default)")
7
+ */
8
+ export declare function parseReferenceString(value: string): {
9
+ path: string;
10
+ database?: string;
11
+ } | null;
12
+ /**
13
+ * Check if a string value looks like a reference
14
+ */
15
+ export declare function looksLikeReference(value: any): boolean;
16
+ export declare function findCommonInitialStringInPath(valuesCount?: ValuesCountEntry): string | undefined;
17
+ export declare function removeInitialAndTrailingSlashes(s: string): string;
18
+ export declare function removeInitialSlash(s: string): string;
19
+ export declare function removeTrailingSlash(s: string): string;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,35 @@
1
+ import { DataType } from "@rebasepro/types";
2
+ export type TypesCount = {
3
+ number?: number;
4
+ string?: number;
5
+ boolean?: number;
6
+ map?: TypesCountRecord;
7
+ array?: TypesCount;
8
+ date?: number;
9
+ geopoint?: number;
10
+ reference?: number;
11
+ relation?: number;
12
+ };
13
+ export type TypesCountRecord<K extends keyof DataType = any> = {
14
+ [P in K]: TypesCount;
15
+ };
16
+ export type ValuesCountEntry = {
17
+ values: any[];
18
+ valuesCount: Map<any, number>;
19
+ mapValues?: ValuesCountRecord;
20
+ };
21
+ export type ValuesCountRecord = Record<string, ValuesCountEntry>;
22
+ export type InferencePropertyBuilderProps = {
23
+ /**
24
+ * Name of the property
25
+ */
26
+ name: string;
27
+ /**
28
+ * Total documents this props are built from
29
+ */
30
+ totalDocsCount: number;
31
+ /**
32
+ * How many times does each value show up
33
+ */
34
+ valuesResult?: ValuesCountEntry;
35
+ };
package/dist/util.d.ts ADDED
@@ -0,0 +1,11 @@
1
+ import { EnumValueConfig, EnumValues } from "@rebasepro/types";
2
+ export declare function extractEnumFromValues(values: unknown[]): {
3
+ id: string;
4
+ label: string;
5
+ }[];
6
+ export declare function prettifyIdentifier(input: string): string;
7
+ export declare function unslugify(slug?: string): string;
8
+ export declare function resolveEnumValues(input: EnumValues): EnumValueConfig[] | undefined;
9
+ export declare function mergeDeep<T extends Record<any, any>, U extends Record<any, any>>(target: T, source: U, ignoreUndefined?: boolean): T & U;
10
+ export declare function isObject(item: any): any;
11
+ export declare function isPlainObject(obj: any): boolean;
package/package.json ADDED
@@ -0,0 +1,37 @@
1
+ {
2
+ "name": "@rebasepro/schema-inference",
3
+ "version": "0.0.1-canary.4d4fb3e",
4
+ "type": "module",
5
+ "publishConfig": {
6
+ "access": "public"
7
+ },
8
+ "main": "./dist/index.umd.cjs",
9
+ "module": "./dist/index.es.js",
10
+ "types": "dist/index.d.ts",
11
+ "source": "src/index.ts",
12
+ "devDependencies": {
13
+ "@rebasepro/types": "0.0.1-canary.4d4fb3e",
14
+ "@types/node": "^20.17.14",
15
+ "typescript": "^5.9.3",
16
+ "vite": "^7.2.4"
17
+ },
18
+ "exports": {
19
+ ".": {
20
+ "types": "./dist/index.d.ts",
21
+ "development": "./src/index.ts",
22
+ "import": "./dist/index.es.js",
23
+ "require": "./dist/index.umd.cjs"
24
+ },
25
+ "./package.json": "./package.json"
26
+ },
27
+ "files": [
28
+ "dist",
29
+ "src"
30
+ ],
31
+ "scripts": {
32
+ "dev": "vite",
33
+ "build": "vite build && tsc --emitDeclarationOnly -p tsconfig.prod.json",
34
+ "clean": "rm -rf dist && find ./src -name '*.js' -type f | xargs rm -f"
35
+ },
36
+ "gitHead": "71bcef3c51a458cd054f7924cc18efbbe515dcc8"
37
+ }
@@ -0,0 +1,18 @@
1
+ import { findCommonInitialStringInPath } from "../strings";
2
+ import { InferencePropertyBuilderProps } from "../types";
3
+ import { Property } from "@rebasepro/types";
4
+
5
+ export function buildReferenceProperty({
6
+ name,
7
+ totalDocsCount,
8
+ valuesResult
9
+ }: InferencePropertyBuilderProps): Property {
10
+
11
+ const property: Property = {
12
+ name: name ?? "",
13
+ type: "reference",
14
+ path: findCommonInitialStringInPath(valuesResult) ?? "!!!FIX_ME!!!"
15
+ };
16
+
17
+ return property;
18
+ }
@@ -0,0 +1,115 @@
1
+ import { findCommonInitialStringInPath } from "../strings";
2
+ import { extractEnumFromValues } from "../util";
3
+ import { FileType, Property, StringProperty } from "@rebasepro/types";
4
+ import { InferencePropertyBuilderProps, ValuesCountEntry } from "../types";
5
+
6
+ const IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp", ".gif", ".avif"];
7
+ const AUDIO_EXTENSIONS = [".mp3", ".ogg", ".opus", ".aac"];
8
+ const VIDEO_EXTENSIONS = [".avi", ".mp4"];
9
+
10
+ const emailRegEx = /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/;
11
+
12
+ export function buildStringProperty({
13
+ name,
14
+ totalDocsCount,
15
+ valuesResult
16
+ }: InferencePropertyBuilderProps): Property {
17
+
18
+ let stringProperty: Property = {
19
+ name: name ?? "",
20
+ type: "string",
21
+
22
+ };
23
+
24
+ if (valuesResult) {
25
+
26
+ const totalEntriesCount = valuesResult.values.length;
27
+ const totalValues = Array.from(valuesResult.valuesCount.keys()).length;
28
+
29
+ const config: Partial<StringProperty> = {};
30
+
31
+ const probablyAURL = valuesResult.values
32
+ .filter((value) => typeof value === "string" &&
33
+ value.toString().startsWith("http")).length > totalDocsCount / 3 * 2;
34
+ if (probablyAURL) {
35
+ config.url = true;
36
+ }
37
+
38
+ const probablyAnEmail = valuesResult.values
39
+ .filter((value) => typeof value === "string" &&
40
+ emailRegEx.test(value)).length > totalDocsCount / 3 * 2;
41
+ if (probablyAnEmail) {
42
+ config.email = true;
43
+ }
44
+
45
+ const probablyUserIds = valuesResult.values
46
+ .filter((value) => typeof value === "string" && value.length === 28 && !value.includes(" "))
47
+ .length > totalDocsCount / 3 * 2;
48
+ if (probablyUserIds)
49
+ config.readOnly = true;
50
+
51
+ if (!probablyAnEmail &&
52
+ !probablyAURL &&
53
+ !probablyUserIds &&
54
+ !probablyAURL &&
55
+ totalValues < totalEntriesCount / 3
56
+ ) {
57
+ const enumValues = extractEnumFromValues(Array.from(valuesResult.valuesCount.keys()));
58
+
59
+ if (Object.keys(enumValues).length > 1)
60
+ config.enum = enumValues;
61
+ }
62
+
63
+ // regular string
64
+ if (!probablyAnEmail &&
65
+ !probablyAURL &&
66
+ !probablyUserIds &&
67
+ !probablyAURL &&
68
+ !config.enum) {
69
+ const fileType = probableFileType(valuesResult, totalDocsCount);
70
+ if (fileType) {
71
+ config.storage = {
72
+ acceptedFiles: fileType as FileType[],
73
+ storagePath: findCommonInitialStringInPath(valuesResult) ?? "/"
74
+ };
75
+ }
76
+ }
77
+
78
+ if (Object.keys(config).length > 0)
79
+ stringProperty = {
80
+ ...stringProperty,
81
+ ...config
82
+ } as StringProperty;
83
+ }
84
+
85
+ return stringProperty;
86
+ }
87
+
88
+ function probableFileType(valuesCount: ValuesCountEntry, totalDocsCount: number): false | FileType[] {
89
+ const isImage = (value: string) => IMAGE_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
90
+ const isAudio = (value: string) => AUDIO_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
91
+ const isVideo = (value: string) => VIDEO_EXTENSIONS.some((extension) => value.toString().endsWith(extension));
92
+
93
+ const stringValues = valuesCount.values.filter((v): v is string => typeof v === "string");
94
+
95
+ let imageCount = 0;
96
+ let audioCount = 0;
97
+ let videoCount = 0;
98
+
99
+ for (const value of stringValues) {
100
+ if (isImage(value)) imageCount++;
101
+ else if (isAudio(value)) audioCount++;
102
+ else if (isVideo(value)) videoCount++;
103
+ }
104
+
105
+ const totalMediaCount = imageCount + audioCount + videoCount;
106
+ if (totalMediaCount > (totalDocsCount * 2) / 3) {
107
+ const fileTypes: FileType[] = [];
108
+ if (imageCount > 0) fileTypes.push("image/*");
109
+ if (audioCount > 0) fileTypes.push("audio/*");
110
+ if (videoCount > 0) fileTypes.push("video/*");
111
+ return fileTypes.length > 0 ? fileTypes : false;
112
+ }
113
+
114
+ return false;
115
+ }
@@ -0,0 +1,18 @@
1
+ import { PropertyValidationSchema } from "@rebasepro/types";
2
+ import { InferencePropertyBuilderProps } from "../types";
3
+
4
+ export function buildValidation({
5
+ totalDocsCount,
6
+ valuesResult
7
+ }: InferencePropertyBuilderProps): PropertyValidationSchema | undefined {
8
+
9
+ if (valuesResult) {
10
+ const totalEntriesCount = valuesResult.values.length;
11
+ if (totalDocsCount === totalEntriesCount)
12
+ return {
13
+ required: true
14
+ }
15
+ }
16
+
17
+ return undefined;
18
+ }
@@ -0,0 +1,404 @@
1
+ import {
2
+ InferencePropertyBuilderProps,
3
+ TypesCount,
4
+ TypesCountRecord,
5
+ ValuesCountEntry,
6
+ ValuesCountRecord
7
+ } from "./types";
8
+ import { buildStringProperty } from "./builders/string_property_builder";
9
+ import { buildValidation } from "./builders/validation_builder";
10
+ import { buildReferenceProperty } from "./builders/reference_property_builder";
11
+ import { extractEnumFromValues, mergeDeep, prettifyIdentifier, resolveEnumValues } from "./util";
12
+ import { DataType, EnumValues, Properties, Property, StringProperty } from "@rebasepro/types";
13
+
14
+ export type InferenceTypeBuilder = (value: any) => DataType;
15
+
16
+ export async function buildEntityPropertiesFromData(
17
+ data: object[],
18
+ getType: InferenceTypeBuilder
19
+ ): Promise<Properties> {
20
+ const typesCount: TypesCountRecord = {};
21
+ const valuesCount: ValuesCountRecord = {};
22
+ if (data) {
23
+ data.forEach((entry) => {
24
+ if (entry) {
25
+ Object.entries(entry).forEach(([key, value]) => {
26
+ if (key.startsWith("_")) return; // Ignore properties starting with _
27
+ increaseMapTypeCount(typesCount, key, value, getType);
28
+ increaseValuesCount(valuesCount, key, value, getType);
29
+ });
30
+ }
31
+ });
32
+ }
33
+ return buildPropertiesFromCount(data.length, typesCount, valuesCount);
34
+ }
35
+
36
+ export function buildPropertyFromData(
37
+ data: any[],
38
+ property: Property,
39
+ getType: InferenceTypeBuilder
40
+ ): Property {
41
+ const typesCount = {};
42
+ const valuesCount: ValuesCountRecord = {};
43
+ if (data) {
44
+ data.forEach((entry) => {
45
+ increaseTypeCount(property.type, typesCount, entry, getType);
46
+ increaseValuesCount(valuesCount, "inferred_prop", entry, getType);
47
+ });
48
+ }
49
+ const enumValues = "enum" in property ? resolveEnumValues(property["enum"] as EnumValues) : undefined;
50
+ if (enumValues) {
51
+ const newEnumValues = extractEnumFromValues(Array.from(valuesCount["inferred_prop"].valuesCount.keys()));
52
+ return {
53
+ ...property,
54
+ enum: [...newEnumValues, ...enumValues]
55
+ } as StringProperty;
56
+ }
57
+ const generatedProperty = buildPropertyFromCount(
58
+ "inferred_prop",
59
+ data.length,
60
+ property.type,
61
+ typesCount,
62
+ valuesCount["inferred_prop"]
63
+ );
64
+ return mergeDeep(generatedProperty, property);
65
+ }
66
+
67
+ export function buildPropertiesOrder(
68
+ properties: Properties,
69
+ propertiesOrder?: string[],
70
+ priorityKeys?: string[]
71
+ ): string[] {
72
+ const lowerCasePriorityKeys = (priorityKeys ?? []).map((key) => key.toLowerCase());
73
+
74
+ function propOrder(s: string) {
75
+ const k = s.toLowerCase();
76
+ if (lowerCasePriorityKeys.includes(k)) return 4;
77
+ if (k === "title" || k === "name") return 3;
78
+ if (k.includes("title") || k.includes("name")) return 2;
79
+ if (k.includes("image") || k.includes("picture")) return 1;
80
+ return 0;
81
+ }
82
+
83
+ const keys = propertiesOrder ?? Object.keys(properties);
84
+ keys.sort(); // alphabetically
85
+ keys.sort((a, b) => {
86
+ return propOrder(b) - propOrder(a);
87
+ });
88
+ return keys;
89
+ }
90
+
91
+ /**
92
+ * @param type
93
+ * @param typesCount
94
+ * @param fieldValue
95
+ * @param getType
96
+ */
97
+ function increaseTypeCount(
98
+ type: DataType,
99
+ typesCount: TypesCount,
100
+ fieldValue: any,
101
+ getType: InferenceTypeBuilder
102
+ ) {
103
+ if (type === "map") {
104
+ if (fieldValue) {
105
+ let mapTypesCount = typesCount[type];
106
+ if (!mapTypesCount) {
107
+ mapTypesCount = {};
108
+ typesCount[type] = mapTypesCount;
109
+ }
110
+ Object.entries(fieldValue).forEach(([key, value]) => {
111
+ increaseMapTypeCount(mapTypesCount as TypesCountRecord, key, value, getType);
112
+ });
113
+ }
114
+ } else if (type === "array") {
115
+ let arrayTypesCount = typesCount[type];
116
+ if (!arrayTypesCount) {
117
+ arrayTypesCount = {};
118
+ typesCount[type] = arrayTypesCount;
119
+ }
120
+ if (fieldValue && Array.isArray(fieldValue) && fieldValue.length > 0) {
121
+ const arrayType = getMostProbableTypeInArray(fieldValue, getType);
122
+ if (arrayType === "map") {
123
+ let mapTypesCount = arrayTypesCount[arrayType];
124
+ if (!mapTypesCount) {
125
+ mapTypesCount = {};
126
+ }
127
+ fieldValue.forEach((value) => {
128
+ if (value && typeof value === "object" && !Array.isArray(value)) { // Ensure value is an object for Object.entries
129
+ Object.entries(value).forEach(([key, v]) =>
130
+ increaseMapTypeCount(mapTypesCount, key, v, getType)
131
+ );
132
+ }
133
+ });
134
+ arrayTypesCount[arrayType] = mapTypesCount;
135
+ } else {
136
+ if (!arrayTypesCount[arrayType]) arrayTypesCount[arrayType] = 1;
137
+ else arrayTypesCount[arrayType] = Number(arrayTypesCount[arrayType]) + 1;
138
+ }
139
+ }
140
+ } else {
141
+ if (!typesCount[type]) typesCount[type] = 1;
142
+ else typesCount[type] = Number(typesCount[type]) + 1;
143
+ }
144
+ }
145
+
146
+ function increaseMapTypeCount(
147
+ typesCountRecord: TypesCountRecord,
148
+ key: string,
149
+ fieldValue: any,
150
+ getType: InferenceTypeBuilder
151
+ ) {
152
+ if (key.startsWith("_")) return; // Ignore properties starting with _
153
+
154
+ let typesCount: TypesCount = typesCountRecord[key];
155
+ if (!typesCount) {
156
+ typesCount = {};
157
+ typesCountRecord[key] = typesCount;
158
+ }
159
+
160
+ if (fieldValue != null) {
161
+ // Check that fieldValue is not null or undefined before proceeding
162
+ const type = getType(fieldValue);
163
+ increaseTypeCount(type, typesCount, fieldValue, getType);
164
+ }
165
+ }
166
+
167
+ function increaseValuesCount(
168
+ typeValuesRecord: ValuesCountRecord,
169
+ key: string,
170
+ fieldValue: any,
171
+ getType: InferenceTypeBuilder
172
+ ) {
173
+ if (key.startsWith("_")) return; // Ignore properties starting with _
174
+
175
+ const type = getType(fieldValue);
176
+
177
+ let valuesRecord: {
178
+ values: any[];
179
+ valuesCount: Map<any, number>;
180
+ map?: ValuesCountRecord;
181
+ } = typeValuesRecord[key];
182
+
183
+ if (!valuesRecord) {
184
+ valuesRecord = {
185
+ values: [],
186
+ valuesCount: new Map()
187
+ };
188
+ typeValuesRecord[key] = valuesRecord;
189
+ }
190
+
191
+ if (type === "map") {
192
+ let mapValuesRecord: ValuesCountRecord | undefined = valuesRecord.map;
193
+ if (!mapValuesRecord) {
194
+ mapValuesRecord = {};
195
+ valuesRecord.map = mapValuesRecord;
196
+ }
197
+ if (fieldValue)
198
+ Object.entries(fieldValue).forEach(([subKey, value]) =>
199
+ increaseValuesCount(mapValuesRecord as ValuesCountRecord, subKey, value, getType)
200
+ );
201
+ } else if (type === "array") {
202
+ if (Array.isArray(fieldValue)) {
203
+ fieldValue.forEach((value) => {
204
+ valuesRecord.values.push(value);
205
+ valuesRecord.valuesCount.set(value, (valuesRecord.valuesCount.get(value) ?? 0) + 1);
206
+ });
207
+ }
208
+ } else {
209
+ if (fieldValue !== null && fieldValue !== undefined) {
210
+ valuesRecord.values.push(fieldValue);
211
+ valuesRecord.valuesCount.set(fieldValue, (valuesRecord.valuesCount.get(fieldValue) ?? 0) + 1);
212
+ }
213
+ }
214
+ }
215
+
216
+ function getHighestTypesCount(typesCount: TypesCount): number {
217
+ let highestCount = 0;
218
+ Object.entries(typesCount).forEach(([type, count]) => {
219
+ let countValue = 0;
220
+ if (type === "map") {
221
+ countValue = getHighestRecordCount(count as TypesCountRecord);
222
+ } else if (type === "array") {
223
+ countValue = getHighestTypesCount(count as TypesCount);
224
+ } else {
225
+ countValue = Number(count);
226
+ }
227
+ if (countValue > highestCount) {
228
+ highestCount = countValue;
229
+ }
230
+ });
231
+
232
+ return highestCount;
233
+ }
234
+
235
+ function getHighestRecordCount(record: TypesCountRecord): number {
236
+ return Object.entries(record)
237
+ .map(([key, typesCount]) => getHighestTypesCount(typesCount))
238
+ .reduce((a, b) => Math.max(a, b), 0);
239
+ }
240
+
241
+ function getMostProbableType(typesCount: TypesCount): DataType {
242
+ let highestCount = -1;
243
+ let probableType: DataType = "string"; // default
244
+ Object.entries(typesCount).forEach(([type, count]) => {
245
+ let countValue;
246
+ if (type === "map") {
247
+ countValue = getHighestRecordCount(count as TypesCountRecord);
248
+ } else if (type === "array") {
249
+ countValue = getHighestTypesCount(count as TypesCount);
250
+ } else {
251
+ countValue = Number(count);
252
+ }
253
+ if (countValue > highestCount) {
254
+ highestCount = countValue;
255
+ probableType = type as DataType;
256
+ }
257
+ });
258
+ return probableType;
259
+ }
260
+
261
+ function buildPropertyFromCount(
262
+ key: string,
263
+ totalDocsCount: number,
264
+ mostProbableType: DataType,
265
+ typesCount: TypesCount,
266
+ valuesResult?: ValuesCountEntry
267
+ ): Property {
268
+ let title: string | undefined;
269
+
270
+ if (key) {
271
+ title = prettifyIdentifier(key);
272
+ }
273
+
274
+ let result: Property | undefined = undefined;
275
+ if (mostProbableType === "map") {
276
+ const highVariability = checkTypesCountHighVariability(typesCount);
277
+ if (highVariability) {
278
+ result = {
279
+ type: "map",
280
+ name: title ?? key ?? "",
281
+ keyValue: true,
282
+ properties: {}
283
+ };
284
+ }
285
+ const properties = buildPropertiesFromCount(
286
+ totalDocsCount,
287
+ typesCount.map as TypesCountRecord,
288
+ valuesResult ? valuesResult.mapValues : undefined
289
+ );
290
+ result = {
291
+ type: "map",
292
+ name: title ?? key ?? "",
293
+ properties
294
+ };
295
+ } else if (mostProbableType === "array") {
296
+ const arrayTypesCount = typesCount.array as TypesCount;
297
+ const arrayMostProbableType = getMostProbableType(arrayTypesCount);
298
+ const of = buildPropertyFromCount(
299
+ key,
300
+ totalDocsCount,
301
+ arrayMostProbableType,
302
+ arrayTypesCount,
303
+ valuesResult
304
+ );
305
+ result = {
306
+ type: "array",
307
+ name: title ?? key ?? "",
308
+ of
309
+ };
310
+ }
311
+
312
+ if (!result) {
313
+ const propertyProps: InferencePropertyBuilderProps = {
314
+ name: key,
315
+ totalDocsCount,
316
+ valuesResult
317
+ };
318
+ if (mostProbableType === "string") {
319
+ result = buildStringProperty(propertyProps);
320
+ } else if (mostProbableType === "reference") {
321
+ result = buildReferenceProperty(propertyProps);
322
+ } else {
323
+ result = {
324
+ type: mostProbableType
325
+ } as Property;
326
+ }
327
+
328
+ if (title) {
329
+ result.name = title;
330
+ }
331
+
332
+ const validation = buildValidation(propertyProps);
333
+ if (validation) {
334
+ result.validation = validation;
335
+ }
336
+ }
337
+
338
+ return result;
339
+ }
340
+
341
+ function buildPropertiesFromCount(
342
+ totalDocsCount: number,
343
+ typesCountRecord: TypesCountRecord,
344
+ valuesCountRecord?: ValuesCountRecord
345
+ ): Properties {
346
+ const res: Properties = {};
347
+ Object.entries(typesCountRecord).forEach(([key, typesCount]) => {
348
+ const mostProbableType = getMostProbableType(typesCount);
349
+ res[key] = buildPropertyFromCount(
350
+ key,
351
+ totalDocsCount,
352
+ mostProbableType,
353
+ typesCount,
354
+ valuesCountRecord ? valuesCountRecord[key] : undefined
355
+ );
356
+ });
357
+ return res;
358
+ }
359
+
360
+ function countMaxDocumentsUnder(typesCount: TypesCount) {
361
+ let count = 0;
362
+ Object.entries(typesCount).forEach(([type, value]) => {
363
+ if (typeof value === "object") {
364
+ count = Math.max(count, countMaxDocumentsUnder(value as TypesCountRecord));
365
+ } else {
366
+ count = Math.max(count, Number(value));
367
+ }
368
+ });
369
+ return count;
370
+ }
371
+
372
+ function getMostProbableTypeInArray(
373
+ array: any[],
374
+ getType: InferenceTypeBuilder
375
+ ): DataType {
376
+ const typesCount: TypesCount = {};
377
+ array.forEach((value) => {
378
+ increaseTypeCount(getType(value), typesCount, value, getType);
379
+ });
380
+ return getMostProbableType(typesCount);
381
+ }
382
+
383
+ function checkTypesCountHighVariability(typesCount: TypesCount) {
384
+ const maxCount = countMaxDocumentsUnder(typesCount);
385
+ let keysWithFewValues = 0;
386
+ Object.entries(typesCount.map ?? {}).forEach(([key, value]) => {
387
+ const count = countMaxDocumentsUnder(value);
388
+ if (count < maxCount / 3) {
389
+ keysWithFewValues++;
390
+ }
391
+ });
392
+ return keysWithFewValues / Object.entries(typesCount.map ?? {}).length > 0.5;
393
+ }
394
+
395
+
396
+ export function inferTypeFromValue(value: any): DataType {
397
+ if (value === null || value === undefined) return "string";
398
+ if (typeof value === "string") return "string";
399
+ if (typeof value === "number") return "number";
400
+ if (typeof value === "boolean") return "boolean";
401
+ if (Array.isArray(value)) return "array";
402
+ if (typeof value === "object") return "map";
403
+ return "string";
404
+ }
package/src/index.ts ADDED
@@ -0,0 +1,3 @@
1
+ export * from "./collection_builder";
2
+ export * from "./util";
3
+ export * from "./strings";