patram 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/lib/build-graph-identity.js +86 -99
  2. package/lib/build-graph.js +536 -31
  3. package/lib/build-graph.types.ts +6 -2
  4. package/lib/check-directive-metadata.js +534 -0
  5. package/lib/check-directive-value.js +291 -0
  6. package/lib/check-graph.js +23 -5
  7. package/lib/cli-help-metadata.js +56 -16
  8. package/lib/command-output.js +16 -1
  9. package/lib/derived-summary.js +10 -8
  10. package/lib/directive-diagnostics.js +38 -0
  11. package/lib/directive-type-rules.js +133 -0
  12. package/lib/discover-fields.js +435 -0
  13. package/lib/discover-fields.types.ts +52 -0
  14. package/lib/document-node-identity.js +317 -0
  15. package/lib/format-node-header.js +9 -7
  16. package/lib/format-output-metadata.js +15 -23
  17. package/lib/layout-stored-queries.js +124 -85
  18. package/lib/load-patram-config.js +433 -96
  19. package/lib/load-patram-config.types.ts +98 -3
  20. package/lib/load-project-graph.js +4 -1
  21. package/lib/output-view.types.ts +14 -6
  22. package/lib/parse-cli-arguments.types.ts +1 -1
  23. package/lib/parse-where-clause.js +344 -107
  24. package/lib/parse-where-clause.types.ts +25 -8
  25. package/lib/patram-cli.js +68 -4
  26. package/lib/patram-config.js +31 -31
  27. package/lib/patram-config.types.ts +10 -4
  28. package/lib/query-graph.js +269 -40
  29. package/lib/query-inspection.js +440 -60
  30. package/lib/render-field-discovery.js +184 -0
  31. package/lib/render-json-output.js +21 -22
  32. package/lib/render-output-view.js +301 -34
  33. package/lib/render-plain-output.js +1 -1
  34. package/lib/render-rich-output.js +1 -1
  35. package/lib/render-rich-source.js +245 -14
  36. package/lib/resolve-patram-graph-config.js +15 -9
  37. package/lib/show-document.js +66 -9
  38. package/package.json +5 -5
@@ -0,0 +1,133 @@
1
+ /**
2
+ * @import { MetadataFieldConfig } from './load-patram-config.types.ts';
3
+ */
4
+
5
+ import { isPathLikeTarget } from './claim-helpers.js';
6
+
7
+ /**
8
+ * @param {MetadataFieldConfig} type_definition
9
+ * @param {string} directive_value
10
+ * @returns {boolean}
11
+ */
12
+ export function isDirectiveValueValid(type_definition, directive_value) {
13
+ if (directive_value.length === 0) {
14
+ return false;
15
+ }
16
+
17
+ switch (type_definition.type) {
18
+ case 'string':
19
+ return true;
20
+ case 'integer':
21
+ return /^-?\d+$/du.test(directive_value);
22
+ case 'path':
23
+ return isPathLikeTarget(directive_value);
24
+ case 'glob':
25
+ return true;
26
+ case 'date':
27
+ return isValidDateValue(directive_value);
28
+ case 'date_time':
29
+ return isValidDateTimeValue(directive_value);
30
+ default:
31
+ throw new Error(`Unsupported directive type "${type_definition.type}".`);
32
+ }
33
+ }
34
+
35
+ /**
36
+ * @param {string} directive_name
37
+ * @param {Exclude<MetadataFieldConfig['type'], 'enum'>} type_name
38
+ * @returns {string}
39
+ */
40
+ export function getInvalidTypeMessage(directive_name, type_name) {
41
+ switch (type_name) {
42
+ case 'string':
43
+ return `Directive "${directive_name}" must be a non-empty string.`;
44
+ case 'integer':
45
+ return `Directive "${directive_name}" must be a base-10 integer.`;
46
+ case 'path':
47
+ return `Directive "${directive_name}" must be a path-like string.`;
48
+ case 'glob':
49
+ return `Directive "${directive_name}" must be a non-empty glob string.`;
50
+ case 'date':
51
+ return `Directive "${directive_name}" must use YYYY-MM-DD.`;
52
+ case 'date_time':
53
+ return `Directive "${directive_name}" must use YYYY-MM-DD HH:MM.`;
54
+ default:
55
+ throw new Error(`Unsupported directive type "${type_name}".`);
56
+ }
57
+ }
58
+
59
+ /**
60
+ * @param {string[]} values
61
+ * @returns {string}
62
+ */
63
+ export function formatQuotedList(values) {
64
+ return values.map((value) => `"${value}"`).join(', ');
65
+ }
66
+
67
+ /**
68
+ * @param {string} directive_value
69
+ * @returns {boolean}
70
+ */
71
+ function isValidDateValue(directive_value) {
72
+ const date_match = /^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})$/du.exec(
73
+ directive_value,
74
+ );
75
+
76
+ if (!date_match?.groups) {
77
+ return false;
78
+ }
79
+
80
+ return isRealCalendarDate(
81
+ Number(date_match.groups.year),
82
+ Number(date_match.groups.month),
83
+ Number(date_match.groups.day),
84
+ );
85
+ }
86
+
87
+ /**
88
+ * @param {string} directive_value
89
+ * @returns {boolean}
90
+ */
91
+ function isValidDateTimeValue(directive_value) {
92
+ const date_time_match =
93
+ /^(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2}) (?<hour>\d{2}):(?<minute>\d{2})$/du.exec(
94
+ directive_value,
95
+ );
96
+
97
+ if (!date_time_match?.groups) {
98
+ return false;
99
+ }
100
+
101
+ const hour = Number(date_time_match.groups.hour);
102
+ const minute = Number(date_time_match.groups.minute);
103
+
104
+ if (hour > 23 || minute > 59) {
105
+ return false;
106
+ }
107
+
108
+ return isRealCalendarDate(
109
+ Number(date_time_match.groups.year),
110
+ Number(date_time_match.groups.month),
111
+ Number(date_time_match.groups.day),
112
+ );
113
+ }
114
+
115
+ /**
116
+ * @param {number} year
117
+ * @param {number} month
118
+ * @param {number} day
119
+ * @returns {boolean}
120
+ */
121
+ function isRealCalendarDate(year, month, day) {
122
+ if (month < 1 || month > 12 || day < 1 || day > 31) {
123
+ return false;
124
+ }
125
+
126
+ const candidate_date = new Date(Date.UTC(year, month - 1, day));
127
+
128
+ return (
129
+ candidate_date.getUTCFullYear() === year &&
130
+ candidate_date.getUTCMonth() === month - 1 &&
131
+ candidate_date.getUTCDate() === day
132
+ );
133
+ }
@@ -0,0 +1,435 @@
1
+ /* eslint-disable max-lines, max-lines-per-function */
2
+ /**
3
+ * @import { ClaimOrigin, PatramClaim } from './parse-claims.types.ts';
4
+ * @import {
5
+ * DiscoveredFieldMultiplicity,
6
+ * DiscoveredFieldTypeName,
7
+ * FieldDiscoveryClassUsage,
8
+ * FieldDiscoveryEvidenceReference,
9
+ * FieldDiscoveryMultiplicitySuggestion,
10
+ * FieldDiscoveryResult,
11
+ * FieldDiscoverySuggestion,
12
+ * FieldDiscoveryTypeSuggestion,
13
+ * } from './discover-fields.types.ts';
14
+ */
15
+
16
+ import { readFile } from 'node:fs/promises';
17
+ import process from 'node:process';
18
+ import { resolve } from 'node:path';
19
+
20
+ import { DEFAULT_INCLUDE_PATTERNS } from './source-file-defaults.js';
21
+ import { listSourceFiles } from './list-source-files.js';
22
+ import { parseSourceFile } from './parse-claims.js';
23
+
24
+ /**
25
+ * Field discovery from source claims.
26
+ *
27
+ * Scans the repository source files directly, infers likely metadata fields,
28
+ * and reports advisory suggestions without requiring repo config to load.
29
+ *
30
+ * Kind: discovery
31
+ * Status: active
32
+ * Tracked in: ../docs/plans/v1/field-model-redesign.md
33
+ * Decided by: ../docs/decisions/field-discovery-workflow.md
34
+ * @patram
35
+ * @see {@link ./render-field-discovery.js}
36
+ */
37
+
38
+ const TYPE_NAME_ORDER = /** @type {const} */ ([
39
+ 'integer',
40
+ 'date_time',
41
+ 'date',
42
+ 'glob',
43
+ 'path',
44
+ 'enum',
45
+ 'string',
46
+ ]);
47
+
48
+ const INTEGER_PATTERN = /^-?\d+$/du;
49
+ const DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/du;
50
+ const DATE_TIME_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}/du;
51
+ const ENUM_PATTERN = /^[a-z0-9_][a-z0-9_-]*$/du;
52
+ const PATH_PATTERN = /^[a-z0-9_.-]+\.[a-z0-9]+$/du;
53
+
54
+ /**
55
+ * @typedef {FieldDiscoveryClassUsage & { confidence: number }} InferredFieldClassUsage
56
+ */
57
+
58
+ /**
59
+ * @typedef {(value: string) => number} FieldTypeScorer
60
+ */
61
+
62
+ /**
63
+ * Discover likely field schema from source files.
64
+ *
65
+ * @param {string} [project_directory]
66
+ * @param {{ defined_field_names?: ReadonlySet<string> }} [options]
67
+ * @returns {Promise<FieldDiscoveryResult>}
68
+ */
69
+ export async function discoverFields(
70
+ project_directory = process.cwd(),
71
+ options,
72
+ ) {
73
+ const defined_field_names = options?.defined_field_names ?? new Set();
74
+ const source_file_paths = await listSourceFiles(
75
+ DEFAULT_INCLUDE_PATTERNS,
76
+ project_directory,
77
+ );
78
+ const parse_results = await Promise.all(
79
+ source_file_paths.map(async (source_file_path) => {
80
+ const source_text = await readFile(
81
+ resolve(project_directory, source_file_path),
82
+ 'utf8',
83
+ );
84
+
85
+ return {
86
+ claims: parseSourceFile({
87
+ path: source_file_path,
88
+ source: source_text,
89
+ }).claims,
90
+ path: source_file_path,
91
+ };
92
+ }),
93
+ );
94
+ /** @type {FieldObservation[]} */
95
+ const field_observations = parse_results.flatMap((parse_result) => {
96
+ /** @type {Set<string>} */
97
+ const document_classes = new Set();
98
+
99
+ for (const claim of parse_result.claims) {
100
+ if (
101
+ claim.type === 'directive' &&
102
+ claim.name === 'kind' &&
103
+ typeof claim.value === 'string' &&
104
+ claim.value.length > 0
105
+ ) {
106
+ document_classes.add(claim.value);
107
+ }
108
+ }
109
+
110
+ return parse_result.claims.flatMap((claim) => {
111
+ if (
112
+ claim.type !== 'directive' ||
113
+ !claim.name ||
114
+ claim.name.startsWith('$') ||
115
+ typeof claim.value !== 'string' ||
116
+ claim.value.length === 0
117
+ ) {
118
+ return [];
119
+ }
120
+
121
+ return [
122
+ {
123
+ class_names: new Set(document_classes),
124
+ document_id: claim.document_id,
125
+ name: claim.name,
126
+ origin: claim.origin,
127
+ value: claim.value,
128
+ },
129
+ ];
130
+ });
131
+ });
132
+ /** @type {Map<string, FieldBucket>} */
133
+ const field_buckets = field_observations.reduce(
134
+ (buckets, field_observation) => {
135
+ const bucket = buckets.get(field_observation.name) ?? {
136
+ name: field_observation.name,
137
+ observations: [],
138
+ };
139
+
140
+ bucket.observations.push(field_observation);
141
+ buckets.set(field_observation.name, bucket);
142
+ return buckets;
143
+ },
144
+ new Map(),
145
+ );
146
+ const fields = [...field_buckets.values()]
147
+ .map(buildFieldSuggestion)
148
+ .filter(
149
+ (field_suggestion) => !defined_field_names.has(field_suggestion.name),
150
+ )
151
+ .sort((left_suggestion, right_suggestion) =>
152
+ left_suggestion.confidence !== right_suggestion.confidence
153
+ ? right_suggestion.confidence - left_suggestion.confidence
154
+ : left_suggestion.name.localeCompare(right_suggestion.name, 'en'),
155
+ );
156
+
157
+ return {
158
+ fields,
159
+ summary: {
160
+ claim_count: parse_results.reduce(
161
+ (sum, parse_result) => sum + parse_result.claims.length,
162
+ 0,
163
+ ),
164
+ count: fields.length,
165
+ source_file_count: source_file_paths.length,
166
+ },
167
+ };
168
+ }
169
+
170
+ /**
171
+ * @param {FieldBucket} field_bucket
172
+ * @returns {FieldDiscoverySuggestion}
173
+ */
174
+ function buildFieldSuggestion(field_bucket) {
175
+ const type_result = inferFieldType(field_bucket.observations);
176
+ const multiplicity_result = inferFieldMultiplicity(field_bucket.observations);
177
+ const class_usage_result = inferFieldClassUsage(field_bucket.observations);
178
+ const evidence_references = buildEvidenceReferences(
179
+ field_bucket.observations,
180
+ );
181
+ const conflicting_evidence = buildEvidenceReferences(
182
+ field_bucket.observations.filter(
183
+ (field_observation) =>
184
+ scoreFieldValue(field_observation.value, type_result.name) === 0,
185
+ ),
186
+ );
187
+
188
+ return {
189
+ confidence:
190
+ Math.round(
191
+ ((type_result.confidence +
192
+ multiplicity_result.confidence +
193
+ class_usage_result.confidence) /
194
+ 3) *
195
+ 100,
196
+ ) / 100,
197
+ conflicting_evidence,
198
+ evidence_references,
199
+ likely_class_usage: {
200
+ classes: class_usage_result.classes,
201
+ },
202
+ likely_multiplicity: multiplicity_result,
203
+ likely_type: type_result,
204
+ name: field_bucket.name,
205
+ };
206
+ }
207
+
208
+ /**
209
+ * @param {FieldObservation[]} observations
210
+ * @returns {FieldDiscoveryEvidenceReference[]}
211
+ */
212
+ function buildEvidenceReferences(observations) {
213
+ return observations
214
+ .map((observation) => ({
215
+ column: observation.origin.column,
216
+ line: observation.origin.line,
217
+ path: observation.origin.path,
218
+ value: observation.value,
219
+ }))
220
+ .sort(compareEvidenceReferences);
221
+ }
222
+
223
+ /**
224
+ * @param {FieldObservation[]} observations
225
+ * @returns {FieldDiscoveryMultiplicitySuggestion}
226
+ */
227
+ function inferFieldMultiplicity(observations) {
228
+ /** @type {Map<string, Set<string>>} */
229
+ const values_by_document = observations.reduce((values, observation) => {
230
+ const current_values = values.get(observation.document_id);
231
+
232
+ if (current_values) {
233
+ current_values.add(observation.value);
234
+ } else {
235
+ values.set(observation.document_id, new Set([observation.value]));
236
+ }
237
+
238
+ return values;
239
+ }, new Map());
240
+ const repeated_identical_documents = [...values_by_document.values()].reduce(
241
+ (count, values) => {
242
+ if (values.size > 1) {
243
+ return Infinity;
244
+ }
245
+
246
+ return values.size === 1 ? count + 1 : count;
247
+ },
248
+ 0,
249
+ );
250
+
251
+ if (repeated_identical_documents === Infinity) {
252
+ return {
253
+ confidence: 1,
254
+ name: 'multiple',
255
+ };
256
+ }
257
+
258
+ return {
259
+ confidence:
260
+ Math.round(
261
+ (values_by_document.size > 1 && repeated_identical_documents > 0
262
+ ? 0.9
263
+ : 0.8) * 100,
264
+ ) / 100,
265
+ name: 'single',
266
+ };
267
+ }
268
+
269
+ /**
270
+ * @param {FieldObservation[]} observations
271
+ * @returns {InferredFieldClassUsage}
272
+ */
273
+ function inferFieldClassUsage(observations) {
274
+ /** @type {Map<string, number>} */
275
+ const class_counts = new Map();
276
+ let documented_observation_count = 0;
277
+
278
+ for (const observation of observations) {
279
+ if (observation.class_names.size === 0) {
280
+ continue;
281
+ }
282
+
283
+ documented_observation_count += 1;
284
+
285
+ for (const class_name of observation.class_names) {
286
+ class_counts.set(class_name, (class_counts.get(class_name) ?? 0) + 1);
287
+ }
288
+ }
289
+
290
+ if (class_counts.size === 0) {
291
+ return {
292
+ confidence: 0.2,
293
+ classes: ['document'],
294
+ };
295
+ }
296
+
297
+ return {
298
+ confidence:
299
+ Math.round(
300
+ (documented_observation_count / Math.max(observations.length, 1)) * 100,
301
+ ) / 100,
302
+ classes: [...class_counts.keys()].sort((left_class, right_class) =>
303
+ left_class.localeCompare(right_class, 'en'),
304
+ ),
305
+ };
306
+ }
307
+
308
+ /**
309
+ * @param {FieldObservation[]} observations
310
+ * @returns {FieldDiscoveryTypeSuggestion}
311
+ */
312
+ function inferFieldType(observations) {
313
+ /** @type {FieldDiscoveryTypeSuggestion[]} */
314
+ const type_candidates = TYPE_NAME_ORDER.map((type_name) => ({
315
+ confidence: scoreFieldType(observations, type_name),
316
+ name: type_name,
317
+ }));
318
+
319
+ type_candidates.sort((left_candidate, right_candidate) => {
320
+ if (left_candidate.confidence !== right_candidate.confidence) {
321
+ return right_candidate.confidence - left_candidate.confidence;
322
+ }
323
+
324
+ return (
325
+ TYPE_NAME_ORDER.indexOf(left_candidate.name) -
326
+ TYPE_NAME_ORDER.indexOf(right_candidate.name)
327
+ );
328
+ });
329
+
330
+ return type_candidates[0];
331
+ }
332
+
333
+ /**
334
+ * @param {FieldDiscoveryEvidenceReference} left_reference
335
+ * @param {FieldDiscoveryEvidenceReference} right_reference
336
+ * @returns {number}
337
+ */
338
+ function compareEvidenceReferences(left_reference, right_reference) {
339
+ const path_compare = left_reference.path.localeCompare(
340
+ right_reference.path,
341
+ 'en',
342
+ );
343
+
344
+ if (path_compare !== 0) {
345
+ return path_compare;
346
+ }
347
+
348
+ if (left_reference.line !== right_reference.line) {
349
+ return left_reference.line - right_reference.line;
350
+ }
351
+
352
+ if (left_reference.column !== right_reference.column) {
353
+ return left_reference.column - right_reference.column;
354
+ }
355
+
356
+ return left_reference.value.localeCompare(right_reference.value, 'en');
357
+ }
358
+
359
+ /**
360
+ * @param {FieldObservation[]} observations
361
+ * @param {DiscoveredFieldTypeName} field_type_name
362
+ * @returns {number}
363
+ */
364
+ function scoreFieldType(observations, field_type_name) {
365
+ if (observations.length === 0) {
366
+ return 0;
367
+ }
368
+
369
+ const total_score = observations.reduce(
370
+ (sum, observation) =>
371
+ sum + scoreFieldValue(observation.value, field_type_name),
372
+ 0,
373
+ );
374
+
375
+ return Math.round((total_score / observations.length) * 100) / 100;
376
+ }
377
+
378
+ /**
379
+ * @param {string} value
380
+ * @param {DiscoveredFieldTypeName} field_type_name
381
+ * @returns {number}
382
+ */
383
+ function scoreFieldValue(value, field_type_name) {
384
+ const scorer = FIELD_TYPE_SCORERS[field_type_name];
385
+ return scorer ? scorer(value) : 0;
386
+ }
387
+
388
+ /** @type {Record<DiscoveredFieldTypeName, FieldTypeScorer>} */
389
+ const FIELD_TYPE_SCORERS = {
390
+ date: (value) => (DATE_PATTERN.test(value) ? 1 : 0),
391
+ date_time: (value) => (DATE_TIME_PATTERN.test(value) ? 1 : 0),
392
+ enum: (value) =>
393
+ ENUM_PATTERN.test(value) && value.includes(' ') === false ? 1 : 0,
394
+ glob: (value) =>
395
+ value.includes('*') ||
396
+ value.includes('?') ||
397
+ value.includes('[') ||
398
+ value.includes(']')
399
+ ? 1
400
+ : 0,
401
+ integer: (value) => (INTEGER_PATTERN.test(value) ? 1 : 0),
402
+ path: (value) =>
403
+ !(
404
+ value.includes('/') ||
405
+ PATH_PATTERN.test(value) ||
406
+ value.startsWith('docs/') ||
407
+ value.startsWith('lib/') ||
408
+ value.startsWith('test/')
409
+ )
410
+ ? 0
411
+ : value.includes('*') ||
412
+ value.includes('?') ||
413
+ value.includes('[') ||
414
+ value.includes(']')
415
+ ? 0.8
416
+ : 1,
417
+ string: () => 0.5,
418
+ };
419
+
420
+ /**
421
+ * @typedef {{
422
+ * class_names: Set<string>,
423
+ * document_id: string,
424
+ * name: string,
425
+ * origin: ClaimOrigin,
426
+ * value: string,
427
+ * }} FieldObservation
428
+ */
429
+
430
+ /**
431
+ * @typedef {{
432
+ * name: string,
433
+ * observations: FieldObservation[],
434
+ * }} FieldBucket
435
+ */
@@ -0,0 +1,52 @@
1
+ export type DiscoveredFieldTypeName =
2
+ | 'date'
3
+ | 'date_time'
4
+ | 'enum'
5
+ | 'glob'
6
+ | 'integer'
7
+ | 'path'
8
+ | 'string';
9
+
10
+ export type DiscoveredFieldMultiplicity = 'multiple' | 'single';
11
+
12
+ export interface FieldDiscoveryEvidenceReference {
13
+ column: number;
14
+ line: number;
15
+ path: string;
16
+ value: string;
17
+ }
18
+
19
+ export interface FieldDiscoveryClassUsage {
20
+ classes: string[];
21
+ }
22
+
23
+ export interface FieldDiscoveryTypeSuggestion {
24
+ confidence: number;
25
+ name: DiscoveredFieldTypeName;
26
+ }
27
+
28
+ export interface FieldDiscoveryMultiplicitySuggestion {
29
+ confidence: number;
30
+ name: DiscoveredFieldMultiplicity;
31
+ }
32
+
33
+ export interface FieldDiscoverySuggestion {
34
+ confidence: number;
35
+ conflicting_evidence: FieldDiscoveryEvidenceReference[];
36
+ evidence_references: FieldDiscoveryEvidenceReference[];
37
+ likely_class_usage: FieldDiscoveryClassUsage;
38
+ likely_multiplicity: FieldDiscoveryMultiplicitySuggestion;
39
+ likely_type: FieldDiscoveryTypeSuggestion;
40
+ name: string;
41
+ }
42
+
43
+ export interface FieldDiscoverySummary {
44
+ claim_count: number;
45
+ count: number;
46
+ source_file_count: number;
47
+ }
48
+
49
+ export interface FieldDiscoveryResult {
50
+ fields: FieldDiscoverySuggestion[];
51
+ summary: FieldDiscoverySummary;
52
+ }