patram 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/build-graph-identity.js +86 -99
- package/lib/build-graph.js +536 -31
- package/lib/build-graph.types.ts +6 -2
- package/lib/check-directive-metadata.js +534 -0
- package/lib/check-directive-value.js +291 -0
- package/lib/check-graph.js +23 -5
- package/lib/cli-help-metadata.js +56 -16
- package/lib/command-output.js +16 -1
- package/lib/derived-summary.js +10 -8
- package/lib/directive-diagnostics.js +38 -0
- package/lib/directive-type-rules.js +133 -0
- package/lib/discover-fields.js +435 -0
- package/lib/discover-fields.types.ts +52 -0
- package/lib/document-node-identity.js +317 -0
- package/lib/format-node-header.js +9 -7
- package/lib/format-output-metadata.js +15 -23
- package/lib/layout-stored-queries.js +124 -85
- package/lib/load-patram-config.js +433 -96
- package/lib/load-patram-config.types.ts +98 -3
- package/lib/load-project-graph.js +4 -1
- package/lib/output-view.types.ts +14 -6
- package/lib/parse-cli-arguments.types.ts +1 -1
- package/lib/parse-where-clause.js +344 -107
- package/lib/parse-where-clause.types.ts +25 -8
- package/lib/patram-cli.js +68 -4
- package/lib/patram-config.js +31 -31
- package/lib/patram-config.types.ts +10 -4
- package/lib/query-graph.js +269 -40
- package/lib/query-inspection.js +440 -60
- package/lib/render-field-discovery.js +184 -0
- package/lib/render-json-output.js +21 -22
- package/lib/render-output-view.js +301 -34
- package/lib/render-plain-output.js +1 -1
- package/lib/render-rich-output.js +1 -1
- package/lib/render-rich-source.js +245 -14
- package/lib/resolve-patram-graph-config.js +15 -9
- package/lib/show-document.js +66 -9
- package/package.json +5 -5
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @import { MetadataFieldConfig } from './load-patram-config.types.ts';
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { isPathLikeTarget } from './claim-helpers.js';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @param {MetadataFieldConfig} type_definition
|
|
9
|
+
* @param {string} directive_value
|
|
10
|
+
* @returns {boolean}
|
|
11
|
+
*/
|
|
12
|
+
export function isDirectiveValueValid(type_definition, directive_value) {
|
|
13
|
+
if (directive_value.length === 0) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
switch (type_definition.type) {
|
|
18
|
+
case 'string':
|
|
19
|
+
return true;
|
|
20
|
+
case 'integer':
|
|
21
|
+
return /^-?\d+$/du.test(directive_value);
|
|
22
|
+
case 'path':
|
|
23
|
+
return isPathLikeTarget(directive_value);
|
|
24
|
+
case 'glob':
|
|
25
|
+
return true;
|
|
26
|
+
case 'date':
|
|
27
|
+
return isValidDateValue(directive_value);
|
|
28
|
+
case 'date_time':
|
|
29
|
+
return isValidDateTimeValue(directive_value);
|
|
30
|
+
default:
|
|
31
|
+
throw new Error(`Unsupported directive type "${type_definition.type}".`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @param {string} directive_name
|
|
37
|
+
* @param {Exclude<MetadataFieldConfig['type'], 'enum'>} type_name
|
|
38
|
+
* @returns {string}
|
|
39
|
+
*/
|
|
40
|
+
export function getInvalidTypeMessage(directive_name, type_name) {
|
|
41
|
+
switch (type_name) {
|
|
42
|
+
case 'string':
|
|
43
|
+
return `Directive "${directive_name}" must be a non-empty string.`;
|
|
44
|
+
case 'integer':
|
|
45
|
+
return `Directive "${directive_name}" must be a base-10 integer.`;
|
|
46
|
+
case 'path':
|
|
47
|
+
return `Directive "${directive_name}" must be a path-like string.`;
|
|
48
|
+
case 'glob':
|
|
49
|
+
return `Directive "${directive_name}" must be a non-empty glob string.`;
|
|
50
|
+
case 'date':
|
|
51
|
+
return `Directive "${directive_name}" must use YYYY-MM-DD.`;
|
|
52
|
+
case 'date_time':
|
|
53
|
+
return `Directive "${directive_name}" must use YYYY-MM-DD HH:MM.`;
|
|
54
|
+
default:
|
|
55
|
+
throw new Error(`Unsupported directive type "${type_name}".`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* @param {string[]} values
|
|
61
|
+
* @returns {string}
|
|
62
|
+
*/
|
|
63
|
+
export function formatQuotedList(values) {
|
|
64
|
+
return values.map((value) => `"${value}"`).join(', ');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* @param {string} directive_value
|
|
69
|
+
* @returns {boolean}
|
|
70
|
+
*/
|
|
71
|
+
function isValidDateValue(directive_value) {
|
|
72
|
+
const date_match = /^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})$/du.exec(
|
|
73
|
+
directive_value,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
if (!date_match?.groups) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return isRealCalendarDate(
|
|
81
|
+
Number(date_match.groups.year),
|
|
82
|
+
Number(date_match.groups.month),
|
|
83
|
+
Number(date_match.groups.day),
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* @param {string} directive_value
|
|
89
|
+
* @returns {boolean}
|
|
90
|
+
*/
|
|
91
|
+
function isValidDateTimeValue(directive_value) {
|
|
92
|
+
const date_time_match =
|
|
93
|
+
/^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2}) (?<hour>\d{2}):(?<minute>\d{2})$/du.exec(
|
|
94
|
+
directive_value,
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
if (!date_time_match?.groups) {
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const hour = Number(date_time_match.groups.hour);
|
|
102
|
+
const minute = Number(date_time_match.groups.minute);
|
|
103
|
+
|
|
104
|
+
if (hour > 23 || minute > 59) {
|
|
105
|
+
return false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return isRealCalendarDate(
|
|
109
|
+
Number(date_time_match.groups.year),
|
|
110
|
+
Number(date_time_match.groups.month),
|
|
111
|
+
Number(date_time_match.groups.day),
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* @param {number} year
|
|
117
|
+
* @param {number} month
|
|
118
|
+
* @param {number} day
|
|
119
|
+
* @returns {boolean}
|
|
120
|
+
*/
|
|
121
|
+
function isRealCalendarDate(year, month, day) {
|
|
122
|
+
if (month < 1 || month > 12 || day < 1 || day > 31) {
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const candidate_date = new Date(Date.UTC(year, month - 1, day));
|
|
127
|
+
|
|
128
|
+
return (
|
|
129
|
+
candidate_date.getUTCFullYear() === year &&
|
|
130
|
+
candidate_date.getUTCMonth() === month - 1 &&
|
|
131
|
+
candidate_date.getUTCDate() === day
|
|
132
|
+
);
|
|
133
|
+
}
|
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
/* eslint-disable max-lines, max-lines-per-function */
|
|
2
|
+
/**
|
|
3
|
+
* @import { ClaimOrigin, PatramClaim } from './parse-claims.types.ts';
|
|
4
|
+
* @import {
|
|
5
|
+
* DiscoveredFieldMultiplicity,
|
|
6
|
+
* DiscoveredFieldTypeName,
|
|
7
|
+
* FieldDiscoveryClassUsage,
|
|
8
|
+
* FieldDiscoveryEvidenceReference,
|
|
9
|
+
* FieldDiscoveryMultiplicitySuggestion,
|
|
10
|
+
* FieldDiscoveryResult,
|
|
11
|
+
* FieldDiscoverySuggestion,
|
|
12
|
+
* FieldDiscoveryTypeSuggestion,
|
|
13
|
+
* } from './discover-fields.types.ts';
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { readFile } from 'node:fs/promises';
|
|
17
|
+
import process from 'node:process';
|
|
18
|
+
import { resolve } from 'node:path';
|
|
19
|
+
|
|
20
|
+
import { DEFAULT_INCLUDE_PATTERNS } from './source-file-defaults.js';
|
|
21
|
+
import { listSourceFiles } from './list-source-files.js';
|
|
22
|
+
import { parseSourceFile } from './parse-claims.js';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Field discovery from source claims.
|
|
26
|
+
*
|
|
27
|
+
* Scans the repository source files directly, infers likely metadata fields,
|
|
28
|
+
* and reports advisory suggestions without requiring repo config to load.
|
|
29
|
+
*
|
|
30
|
+
* Kind: discovery
|
|
31
|
+
* Status: active
|
|
32
|
+
* Tracked in: ../docs/plans/v1/field-model-redesign.md
|
|
33
|
+
* Decided by: ../docs/decisions/field-discovery-workflow.md
|
|
34
|
+
* @patram
|
|
35
|
+
* @see {@link ./render-field-discovery.js}
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
const TYPE_NAME_ORDER = /** @type {const} */ ([
|
|
39
|
+
'integer',
|
|
40
|
+
'date_time',
|
|
41
|
+
'date',
|
|
42
|
+
'glob',
|
|
43
|
+
'path',
|
|
44
|
+
'enum',
|
|
45
|
+
'string',
|
|
46
|
+
]);
|
|
47
|
+
|
|
48
|
+
const INTEGER_PATTERN = /^-?\d+$/du;
|
|
49
|
+
const DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/du;
|
|
50
|
+
const DATE_TIME_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/du;
|
|
51
|
+
const ENUM_PATTERN = /^[a-z0-9_][a-z0-9_-]*$/du;
|
|
52
|
+
const PATH_PATTERN = /^[a-z0-9_.-]+\.[a-z0-9]+$/du;
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* @typedef {FieldDiscoveryClassUsage & { confidence: number }} InferredFieldClassUsage
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @typedef {(value: string) => number} FieldTypeScorer
|
|
60
|
+
*/
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Discover likely field schema from source files.
|
|
64
|
+
*
|
|
65
|
+
* @param {string} [project_directory]
|
|
66
|
+
* @param {{ defined_field_names?: ReadonlySet<string> }} [options]
|
|
67
|
+
* @returns {Promise<FieldDiscoveryResult>}
|
|
68
|
+
*/
|
|
69
|
+
export async function discoverFields(
|
|
70
|
+
project_directory = process.cwd(),
|
|
71
|
+
options,
|
|
72
|
+
) {
|
|
73
|
+
const defined_field_names = options?.defined_field_names ?? new Set();
|
|
74
|
+
const source_file_paths = await listSourceFiles(
|
|
75
|
+
DEFAULT_INCLUDE_PATTERNS,
|
|
76
|
+
project_directory,
|
|
77
|
+
);
|
|
78
|
+
const parse_results = await Promise.all(
|
|
79
|
+
source_file_paths.map(async (source_file_path) => {
|
|
80
|
+
const source_text = await readFile(
|
|
81
|
+
resolve(project_directory, source_file_path),
|
|
82
|
+
'utf8',
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
claims: parseSourceFile({
|
|
87
|
+
path: source_file_path,
|
|
88
|
+
source: source_text,
|
|
89
|
+
}).claims,
|
|
90
|
+
path: source_file_path,
|
|
91
|
+
};
|
|
92
|
+
}),
|
|
93
|
+
);
|
|
94
|
+
/** @type {FieldObservation[]} */
|
|
95
|
+
const field_observations = parse_results.flatMap((parse_result) => {
|
|
96
|
+
/** @type {Set<string>} */
|
|
97
|
+
const document_classes = new Set();
|
|
98
|
+
|
|
99
|
+
for (const claim of parse_result.claims) {
|
|
100
|
+
if (
|
|
101
|
+
claim.type === 'directive' &&
|
|
102
|
+
claim.name === 'kind' &&
|
|
103
|
+
typeof claim.value === 'string' &&
|
|
104
|
+
claim.value.length > 0
|
|
105
|
+
) {
|
|
106
|
+
document_classes.add(claim.value);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return parse_result.claims.flatMap((claim) => {
|
|
111
|
+
if (
|
|
112
|
+
claim.type !== 'directive' ||
|
|
113
|
+
!claim.name ||
|
|
114
|
+
claim.name.startsWith('$') ||
|
|
115
|
+
typeof claim.value !== 'string' ||
|
|
116
|
+
claim.value.length === 0
|
|
117
|
+
) {
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return [
|
|
122
|
+
{
|
|
123
|
+
class_names: new Set(document_classes),
|
|
124
|
+
document_id: claim.document_id,
|
|
125
|
+
name: claim.name,
|
|
126
|
+
origin: claim.origin,
|
|
127
|
+
value: claim.value,
|
|
128
|
+
},
|
|
129
|
+
];
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
/** @type {Map<string, FieldBucket>} */
|
|
133
|
+
const field_buckets = field_observations.reduce(
|
|
134
|
+
(buckets, field_observation) => {
|
|
135
|
+
const bucket = buckets.get(field_observation.name) ?? {
|
|
136
|
+
name: field_observation.name,
|
|
137
|
+
observations: [],
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
bucket.observations.push(field_observation);
|
|
141
|
+
buckets.set(field_observation.name, bucket);
|
|
142
|
+
return buckets;
|
|
143
|
+
},
|
|
144
|
+
new Map(),
|
|
145
|
+
);
|
|
146
|
+
const fields = [...field_buckets.values()]
|
|
147
|
+
.map(buildFieldSuggestion)
|
|
148
|
+
.filter(
|
|
149
|
+
(field_suggestion) => !defined_field_names.has(field_suggestion.name),
|
|
150
|
+
)
|
|
151
|
+
.sort((left_suggestion, right_suggestion) =>
|
|
152
|
+
left_suggestion.confidence !== right_suggestion.confidence
|
|
153
|
+
? right_suggestion.confidence - left_suggestion.confidence
|
|
154
|
+
: left_suggestion.name.localeCompare(right_suggestion.name, 'en'),
|
|
155
|
+
);
|
|
156
|
+
|
|
157
|
+
return {
|
|
158
|
+
fields,
|
|
159
|
+
summary: {
|
|
160
|
+
claim_count: parse_results.reduce(
|
|
161
|
+
(sum, parse_result) => sum + parse_result.claims.length,
|
|
162
|
+
0,
|
|
163
|
+
),
|
|
164
|
+
count: fields.length,
|
|
165
|
+
source_file_count: source_file_paths.length,
|
|
166
|
+
},
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* @param {FieldBucket} field_bucket
|
|
172
|
+
* @returns {FieldDiscoverySuggestion}
|
|
173
|
+
*/
|
|
174
|
+
function buildFieldSuggestion(field_bucket) {
|
|
175
|
+
const type_result = inferFieldType(field_bucket.observations);
|
|
176
|
+
const multiplicity_result = inferFieldMultiplicity(field_bucket.observations);
|
|
177
|
+
const class_usage_result = inferFieldClassUsage(field_bucket.observations);
|
|
178
|
+
const evidence_references = buildEvidenceReferences(
|
|
179
|
+
field_bucket.observations,
|
|
180
|
+
);
|
|
181
|
+
const conflicting_evidence = buildEvidenceReferences(
|
|
182
|
+
field_bucket.observations.filter(
|
|
183
|
+
(field_observation) =>
|
|
184
|
+
scoreFieldValue(field_observation.value, type_result.name) === 0,
|
|
185
|
+
),
|
|
186
|
+
);
|
|
187
|
+
|
|
188
|
+
return {
|
|
189
|
+
confidence:
|
|
190
|
+
Math.round(
|
|
191
|
+
((type_result.confidence +
|
|
192
|
+
multiplicity_result.confidence +
|
|
193
|
+
class_usage_result.confidence) /
|
|
194
|
+
3) *
|
|
195
|
+
100,
|
|
196
|
+
) / 100,
|
|
197
|
+
conflicting_evidence,
|
|
198
|
+
evidence_references,
|
|
199
|
+
likely_class_usage: {
|
|
200
|
+
classes: class_usage_result.classes,
|
|
201
|
+
},
|
|
202
|
+
likely_multiplicity: multiplicity_result,
|
|
203
|
+
likely_type: type_result,
|
|
204
|
+
name: field_bucket.name,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* @param {FieldObservation[]} observations
|
|
210
|
+
* @returns {FieldDiscoveryEvidenceReference[]}
|
|
211
|
+
*/
|
|
212
|
+
function buildEvidenceReferences(observations) {
|
|
213
|
+
return observations
|
|
214
|
+
.map((observation) => ({
|
|
215
|
+
column: observation.origin.column,
|
|
216
|
+
line: observation.origin.line,
|
|
217
|
+
path: observation.origin.path,
|
|
218
|
+
value: observation.value,
|
|
219
|
+
}))
|
|
220
|
+
.sort(compareEvidenceReferences);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* @param {FieldObservation[]} observations
|
|
225
|
+
* @returns {FieldDiscoveryMultiplicitySuggestion}
|
|
226
|
+
*/
|
|
227
|
+
function inferFieldMultiplicity(observations) {
|
|
228
|
+
/** @type {Map<string, Set<string>>} */
|
|
229
|
+
const values_by_document = observations.reduce((values, observation) => {
|
|
230
|
+
const current_values = values.get(observation.document_id);
|
|
231
|
+
|
|
232
|
+
if (current_values) {
|
|
233
|
+
current_values.add(observation.value);
|
|
234
|
+
} else {
|
|
235
|
+
values.set(observation.document_id, new Set([observation.value]));
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return values;
|
|
239
|
+
}, new Map());
|
|
240
|
+
const repeated_identical_documents = [...values_by_document.values()].reduce(
|
|
241
|
+
(count, values) => {
|
|
242
|
+
if (values.size > 1) {
|
|
243
|
+
return Infinity;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return values.size === 1 ? count + 1 : count;
|
|
247
|
+
},
|
|
248
|
+
0,
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
if (repeated_identical_documents === Infinity) {
|
|
252
|
+
return {
|
|
253
|
+
confidence: 1,
|
|
254
|
+
name: 'multiple',
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
confidence:
|
|
260
|
+
Math.round(
|
|
261
|
+
(values_by_document.size > 1 && repeated_identical_documents > 0
|
|
262
|
+
? 0.9
|
|
263
|
+
: 0.8) * 100,
|
|
264
|
+
) / 100,
|
|
265
|
+
name: 'single',
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* @param {FieldObservation[]} observations
|
|
271
|
+
* @returns {InferredFieldClassUsage}
|
|
272
|
+
*/
|
|
273
|
+
function inferFieldClassUsage(observations) {
|
|
274
|
+
/** @type {Map<string, number>} */
|
|
275
|
+
const class_counts = new Map();
|
|
276
|
+
let documented_observation_count = 0;
|
|
277
|
+
|
|
278
|
+
for (const observation of observations) {
|
|
279
|
+
if (observation.class_names.size === 0) {
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
documented_observation_count += 1;
|
|
284
|
+
|
|
285
|
+
for (const class_name of observation.class_names) {
|
|
286
|
+
class_counts.set(class_name, (class_counts.get(class_name) ?? 0) + 1);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (class_counts.size === 0) {
|
|
291
|
+
return {
|
|
292
|
+
confidence: 0.2,
|
|
293
|
+
classes: ['document'],
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
confidence:
|
|
299
|
+
Math.round(
|
|
300
|
+
(documented_observation_count / Math.max(observations.length, 1)) * 100,
|
|
301
|
+
) / 100,
|
|
302
|
+
classes: [...class_counts.keys()].sort((left_class, right_class) =>
|
|
303
|
+
left_class.localeCompare(right_class, 'en'),
|
|
304
|
+
),
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* @param {FieldObservation[]} observations
|
|
310
|
+
* @returns {FieldDiscoveryTypeSuggestion}
|
|
311
|
+
*/
|
|
312
|
+
function inferFieldType(observations) {
|
|
313
|
+
/** @type {FieldDiscoveryTypeSuggestion[]} */
|
|
314
|
+
const type_candidates = TYPE_NAME_ORDER.map((type_name) => ({
|
|
315
|
+
confidence: scoreFieldType(observations, type_name),
|
|
316
|
+
name: type_name,
|
|
317
|
+
}));
|
|
318
|
+
|
|
319
|
+
type_candidates.sort((left_candidate, right_candidate) => {
|
|
320
|
+
if (left_candidate.confidence !== right_candidate.confidence) {
|
|
321
|
+
return right_candidate.confidence - left_candidate.confidence;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
return (
|
|
325
|
+
TYPE_NAME_ORDER.indexOf(left_candidate.name) -
|
|
326
|
+
TYPE_NAME_ORDER.indexOf(right_candidate.name)
|
|
327
|
+
);
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
return type_candidates[0];
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* @param {FieldDiscoveryEvidenceReference} left_reference
|
|
335
|
+
* @param {FieldDiscoveryEvidenceReference} right_reference
|
|
336
|
+
* @returns {number}
|
|
337
|
+
*/
|
|
338
|
+
function compareEvidenceReferences(left_reference, right_reference) {
|
|
339
|
+
const path_compare = left_reference.path.localeCompare(
|
|
340
|
+
right_reference.path,
|
|
341
|
+
'en',
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
if (path_compare !== 0) {
|
|
345
|
+
return path_compare;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (left_reference.line !== right_reference.line) {
|
|
349
|
+
return left_reference.line - right_reference.line;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (left_reference.column !== right_reference.column) {
|
|
353
|
+
return left_reference.column - right_reference.column;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return left_reference.value.localeCompare(right_reference.value, 'en');
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* @param {FieldObservation[]} observations
|
|
361
|
+
* @param {DiscoveredFieldTypeName} field_type_name
|
|
362
|
+
* @returns {number}
|
|
363
|
+
*/
|
|
364
|
+
function scoreFieldType(observations, field_type_name) {
|
|
365
|
+
if (observations.length === 0) {
|
|
366
|
+
return 0;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
const total_score = observations.reduce(
|
|
370
|
+
(sum, observation) =>
|
|
371
|
+
sum + scoreFieldValue(observation.value, field_type_name),
|
|
372
|
+
0,
|
|
373
|
+
);
|
|
374
|
+
|
|
375
|
+
return Math.round((total_score / observations.length) * 100) / 100;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* @param {string} value
|
|
380
|
+
* @param {DiscoveredFieldTypeName} field_type_name
|
|
381
|
+
* @returns {number}
|
|
382
|
+
*/
|
|
383
|
+
function scoreFieldValue(value, field_type_name) {
|
|
384
|
+
const scorer = FIELD_TYPE_SCORERS[field_type_name];
|
|
385
|
+
return scorer ? scorer(value) : 0;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/** @type {Record<DiscoveredFieldTypeName, FieldTypeScorer>} */
|
|
389
|
+
const FIELD_TYPE_SCORERS = {
|
|
390
|
+
date: (value) => (DATE_PATTERN.test(value) ? 1 : 0),
|
|
391
|
+
date_time: (value) => (DATE_TIME_PATTERN.test(value) ? 1 : 0),
|
|
392
|
+
enum: (value) =>
|
|
393
|
+
ENUM_PATTERN.test(value) && value.includes(' ') === false ? 1 : 0,
|
|
394
|
+
glob: (value) =>
|
|
395
|
+
value.includes('*') ||
|
|
396
|
+
value.includes('?') ||
|
|
397
|
+
value.includes('[') ||
|
|
398
|
+
value.includes(']')
|
|
399
|
+
? 1
|
|
400
|
+
: 0,
|
|
401
|
+
integer: (value) => (INTEGER_PATTERN.test(value) ? 1 : 0),
|
|
402
|
+
path: (value) =>
|
|
403
|
+
!(
|
|
404
|
+
value.includes('/') ||
|
|
405
|
+
PATH_PATTERN.test(value) ||
|
|
406
|
+
value.startsWith('docs/') ||
|
|
407
|
+
value.startsWith('lib/') ||
|
|
408
|
+
value.startsWith('test/')
|
|
409
|
+
)
|
|
410
|
+
? 0
|
|
411
|
+
: value.includes('*') ||
|
|
412
|
+
value.includes('?') ||
|
|
413
|
+
value.includes('[') ||
|
|
414
|
+
value.includes(']')
|
|
415
|
+
? 0.8
|
|
416
|
+
: 1,
|
|
417
|
+
string: () => 0.5,
|
|
418
|
+
};
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* @typedef {{
|
|
422
|
+
* class_names: Set<string>,
|
|
423
|
+
* document_id: string,
|
|
424
|
+
* name: string,
|
|
425
|
+
* origin: ClaimOrigin,
|
|
426
|
+
* value: string,
|
|
427
|
+
* }} FieldObservation
|
|
428
|
+
*/
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* @typedef {{
|
|
432
|
+
* name: string,
|
|
433
|
+
* observations: FieldObservation[],
|
|
434
|
+
* }} FieldBucket
|
|
435
|
+
*/
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
export type DiscoveredFieldTypeName =
|
|
2
|
+
| 'date'
|
|
3
|
+
| 'date_time'
|
|
4
|
+
| 'enum'
|
|
5
|
+
| 'glob'
|
|
6
|
+
| 'integer'
|
|
7
|
+
| 'path'
|
|
8
|
+
| 'string';
|
|
9
|
+
|
|
10
|
+
export type DiscoveredFieldMultiplicity = 'multiple' | 'single';
|
|
11
|
+
|
|
12
|
+
export interface FieldDiscoveryEvidenceReference {
|
|
13
|
+
column: number;
|
|
14
|
+
line: number;
|
|
15
|
+
path: string;
|
|
16
|
+
value: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface FieldDiscoveryClassUsage {
|
|
20
|
+
classes: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface FieldDiscoveryTypeSuggestion {
|
|
24
|
+
confidence: number;
|
|
25
|
+
name: DiscoveredFieldTypeName;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface FieldDiscoveryMultiplicitySuggestion {
|
|
29
|
+
confidence: number;
|
|
30
|
+
name: DiscoveredFieldMultiplicity;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface FieldDiscoverySuggestion {
|
|
34
|
+
confidence: number;
|
|
35
|
+
conflicting_evidence: FieldDiscoveryEvidenceReference[];
|
|
36
|
+
evidence_references: FieldDiscoveryEvidenceReference[];
|
|
37
|
+
likely_class_usage: FieldDiscoveryClassUsage;
|
|
38
|
+
likely_multiplicity: FieldDiscoveryMultiplicitySuggestion;
|
|
39
|
+
likely_type: FieldDiscoveryTypeSuggestion;
|
|
40
|
+
name: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface FieldDiscoverySummary {
|
|
44
|
+
claim_count: number;
|
|
45
|
+
count: number;
|
|
46
|
+
source_file_count: number;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface FieldDiscoveryResult {
|
|
50
|
+
fields: FieldDiscoverySuggestion[];
|
|
51
|
+
summary: FieldDiscoverySummary;
|
|
52
|
+
}
|