dcp-wrap 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +151 -0
- package/dist/cli.js.map +1 -0
- package/dist/decoder.d.ts +35 -0
- package/dist/decoder.js +95 -0
- package/dist/decoder.js.map +1 -0
- package/dist/decoder.test.d.ts +1 -0
- package/dist/decoder.test.js +144 -0
- package/dist/decoder.test.js.map +1 -0
- package/dist/encoder.d.ts +38 -0
- package/dist/encoder.js +105 -0
- package/dist/encoder.js.map +1 -0
- package/dist/generator.d.ts +10 -0
- package/dist/generator.js +232 -0
- package/dist/generator.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/mapping.d.ts +21 -0
- package/dist/mapping.js +90 -0
- package/dist/mapping.js.map +1 -0
- package/dist/picoclaw-hook.d.ts +19 -0
- package/dist/picoclaw-hook.js +189 -0
- package/dist/picoclaw-hook.js.map +1 -0
- package/dist/picoclaw-hook.test.d.ts +1 -0
- package/dist/picoclaw-hook.test.js +200 -0
- package/dist/picoclaw-hook.test.js.map +1 -0
- package/dist/schema.d.ts +29 -0
- package/dist/schema.js +120 -0
- package/dist/schema.js.map +1 -0
- package/dist/types.d.ts +62 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/docs/picoclaw-integration.md +548 -0
- package/package.json +50 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { DcpSchema } from "./schema.js";
|
|
2
|
+
import { FieldMapping } from "./mapping.js";
|
|
3
|
+
import type { EncodedBatch } from "./types.js";
|
|
4
|
+
/** Inline schema for dcpEncode — no files, no generator. */
|
|
5
|
+
export interface InlineSchema {
|
|
6
|
+
id: string;
|
|
7
|
+
fields: string[];
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* One-step DCP encode. No schema file, no generator, no mapping.
|
|
11
|
+
* For known structures where fields match source keys directly.
|
|
12
|
+
*
|
|
13
|
+
* Arrays are auto-joined with comma. Use transform for custom handling.
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* const dcp = dcpEncode(results, {
|
|
18
|
+
* id: "engram-recall:v1",
|
|
19
|
+
* fields: ["id", "relevance", "summary", "tags", "hitCount", "weight", "status"],
|
|
20
|
+
* });
|
|
21
|
+
* // → header + rows as newline-separated string
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export declare function dcpEncode(records: Record<string, unknown>[], schema: InlineSchema, options?: {
|
|
25
|
+
transform?: Record<string, (v: unknown) => unknown>;
|
|
26
|
+
}): string;
|
|
27
|
+
export declare class DcpEncoder {
|
|
28
|
+
private readonly schema;
|
|
29
|
+
private readonly mapping;
|
|
30
|
+
constructor(schema: DcpSchema, mapping: FieldMapping);
|
|
31
|
+
/** Encode a batch of records into DCP format. */
|
|
32
|
+
encode(records: Record<string, unknown>[]): EncodedBatch;
|
|
33
|
+
/** Encode a single record, returning just the positional array. */
|
|
34
|
+
encodeOne(record: Record<string, unknown>): unknown[];
|
|
35
|
+
/** Render encoded batch as a string (header + rows, newline-separated). */
|
|
36
|
+
static toString(batch: EncodedBatch): string;
|
|
37
|
+
private detectMask;
|
|
38
|
+
}
|
package/dist/encoder.js
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* One-step DCP encode. No schema file, no generator, no mapping.
|
|
3
|
+
* For known structures where fields match source keys directly.
|
|
4
|
+
*
|
|
5
|
+
* Arrays are auto-joined with comma. Use transform for custom handling.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```ts
|
|
9
|
+
* const dcp = dcpEncode(results, {
|
|
10
|
+
* id: "engram-recall:v1",
|
|
11
|
+
* fields: ["id", "relevance", "summary", "tags", "hitCount", "weight", "status"],
|
|
12
|
+
* });
|
|
13
|
+
* // → header + rows as newline-separated string
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export function dcpEncode(records, schema, options) {
|
|
17
|
+
if (records.length === 0)
|
|
18
|
+
return "";
|
|
19
|
+
const { id, fields } = schema;
|
|
20
|
+
const transforms = options?.transform ?? {};
|
|
21
|
+
const header = JSON.stringify(["$S", id, ...fields]);
|
|
22
|
+
const rows = records.map((record) => {
|
|
23
|
+
const row = fields.map((f) => {
|
|
24
|
+
const raw = record[f] ?? null;
|
|
25
|
+
if (transforms[f]) {
|
|
26
|
+
return transforms[f](raw);
|
|
27
|
+
}
|
|
28
|
+
if (Array.isArray(raw)) {
|
|
29
|
+
return raw.join(",") || "-";
|
|
30
|
+
}
|
|
31
|
+
return raw;
|
|
32
|
+
});
|
|
33
|
+
return JSON.stringify(row);
|
|
34
|
+
});
|
|
35
|
+
return [header, ...rows].join("\n");
|
|
36
|
+
}
|
|
37
|
+
export class DcpEncoder {
|
|
38
|
+
schema;
|
|
39
|
+
mapping;
|
|
40
|
+
constructor(schema, mapping) {
|
|
41
|
+
this.schema = schema;
|
|
42
|
+
this.mapping = mapping;
|
|
43
|
+
}
|
|
44
|
+
/** Encode a batch of records into DCP format. */
|
|
45
|
+
encode(records) {
|
|
46
|
+
if (records.length === 0) {
|
|
47
|
+
return {
|
|
48
|
+
header: "",
|
|
49
|
+
rows: [],
|
|
50
|
+
schemaId: this.schema.id,
|
|
51
|
+
mask: 0,
|
|
52
|
+
isCutdown: false,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
// Resolve all mappings
|
|
56
|
+
const resolvedBatch = records.map((r) => this.mapping.resolve(r));
|
|
57
|
+
// Detect field presence mask
|
|
58
|
+
const mask = this.detectMask(resolvedBatch);
|
|
59
|
+
if (mask === 0) {
|
|
60
|
+
return {
|
|
61
|
+
header: "",
|
|
62
|
+
rows: [],
|
|
63
|
+
schemaId: this.schema.id,
|
|
64
|
+
mask: 0,
|
|
65
|
+
isCutdown: false,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
const isCutdown = mask !== this.schema.fullMask;
|
|
69
|
+
const activeFields = this.schema.fieldsFromMask(mask);
|
|
70
|
+
const schemaId = this.schema.cutdownId(mask);
|
|
71
|
+
// Build $S header
|
|
72
|
+
const headerArr = this.schema.sHeader(mask);
|
|
73
|
+
const header = JSON.stringify(headerArr);
|
|
74
|
+
// Build rows
|
|
75
|
+
const rows = resolvedBatch.map((resolved) => {
|
|
76
|
+
const row = activeFields.map((f) => resolved[f] ?? null);
|
|
77
|
+
return JSON.stringify(row);
|
|
78
|
+
});
|
|
79
|
+
return { header, rows, schemaId, mask, isCutdown };
|
|
80
|
+
}
|
|
81
|
+
/** Encode a single record, returning just the positional array. */
|
|
82
|
+
encodeOne(record) {
|
|
83
|
+
const resolved = this.mapping.resolve(record);
|
|
84
|
+
return this.schema.fields.map((f) => resolved[f] ?? null);
|
|
85
|
+
}
|
|
86
|
+
/** Render encoded batch as a string (header + rows, newline-separated). */
|
|
87
|
+
static toString(batch) {
|
|
88
|
+
if (!batch.header)
|
|
89
|
+
return "";
|
|
90
|
+
return [batch.header, ...batch.rows].join("\n");
|
|
91
|
+
}
|
|
92
|
+
detectMask(resolvedBatch) {
|
|
93
|
+
let mask = 0;
|
|
94
|
+
const fc = this.schema.fieldCount;
|
|
95
|
+
for (const resolved of resolvedBatch) {
|
|
96
|
+
for (let i = 0; i < this.schema.fields.length; i++) {
|
|
97
|
+
if (resolved[this.schema.fields[i]] != null) {
|
|
98
|
+
mask |= 1 << (fc - 1 - i);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return mask;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=encoder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"encoder.js","sourceRoot":"","sources":["../src/encoder.ts"],"names":[],"mappings":"AAUA;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,SAAS,CACvB,OAAkC,EAClC,MAAoB,EACpB,OAAiE;IAEjE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEpC,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAC9B,MAAM,UAAU,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC;IAE5C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC;IACrD,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;QAClC,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC3B,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;YAC9B,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;gBAClB,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC5B,CAAC;YACD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBACvB,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC;YAC9B,CAAC;YACD,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACtC,CAAC;AAED,MAAM,OAAO,UAAU;IACJ,MAAM,CAAY;IAClB,OAAO,CAAe;IAEvC,YAAY,MAAiB,EAAE,OAAqB;QAClD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,iDAAiD;IACjD,MAAM,CAAC,OAAkC;QACvC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,OAAO;gBACL,MAAM,EAAE,EAAE;gBACV,IAAI,EAAE,EAAE;gBACR,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE;gBACxB,IAAI,EAAE,CAAC;gBACP,SAAS,EAAE,KAAK;aACjB,CAAC;QACJ,CAAC;QAED,uBAAuB;QACvB,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAElE,6BAA6B;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC;QAC5C,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,EAAE;gBACV,IAAI,EAAE,EAAE;gBACR,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE;gBACxB,IAAI,EAAE,CAAC;gBACP,SAAS,EAAE,KAAK;aACjB,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,KAAK,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC;QAChD,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAE7C,kBAAkB;QAClB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;QAEzC,aAAa;QACb,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE;YAC1C,MAAM,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;YACzD,OAAO,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IACrD,CAAC;IAED,mEAAmE;IACnE,SAAS,CAAC,MAA+B;QACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IAC5D,CAAC;IAED,2EAA2E;IAC3E,MAAM,CAAC,QAAQ,CAAC,KAAmB;QACjC,IAAI,CAAC,KAAK,CAAC,MAAM;YAAE,OAAO,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClD,CAAC;IAEO,UAAU,CAAC,aAAwC;QACzD,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC;QAClC,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;YACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnD,IAAI,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;oBAC5C,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC5B,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { SchemaDraft, GenerateOptions } from "./types.js";
|
|
2
|
+
export declare class SchemaGenerator {
|
|
3
|
+
/**
|
|
4
|
+
* Generate a schema draft from JSON samples.
|
|
5
|
+
* Infers field types, enums, ordering, and mapping.
|
|
6
|
+
*/
|
|
7
|
+
fromSamples(samples: Record<string, unknown>[], options: GenerateOptions): SchemaDraft;
|
|
8
|
+
}
|
|
9
|
+
/** Format a SchemaDraft as a human-readable report string. */
|
|
10
|
+
export declare function formatReport(draft: SchemaDraft): string;
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { flattenKeys } from "./mapping.js";
|
|
2
|
+
// ── Field ordering heuristics ──────────────────────────────
|
|
3
|
+
const CATEGORY_ORDER = {
|
|
4
|
+
identifier: 0,
|
|
5
|
+
classifier: 1,
|
|
6
|
+
numeric: 2,
|
|
7
|
+
text: 3,
|
|
8
|
+
other: 4,
|
|
9
|
+
};
|
|
10
|
+
const IDENTIFIER_HINTS = new Set([
|
|
11
|
+
"id", "source", "name", "path", "endpoint", "url", "uri", "key",
|
|
12
|
+
"file", "file_path", "doc", "document", "chunk_id", "node_id",
|
|
13
|
+
]);
|
|
14
|
+
const CLASSIFIER_HINTS = new Set([
|
|
15
|
+
"status", "level", "type", "action", "method", "kind", "category",
|
|
16
|
+
"state", "trigger", "mode", "role", "domain",
|
|
17
|
+
]);
|
|
18
|
+
const NUMERIC_HINTS = new Set([
|
|
19
|
+
"score", "count", "weight", "latency", "page", "rank", "index",
|
|
20
|
+
"chunk_index", "position", "size", "duration", "confidence",
|
|
21
|
+
"distance", "similarity", "uptime", "hit_count",
|
|
22
|
+
]);
|
|
23
|
+
function classifyField(name, values) {
|
|
24
|
+
const lower = name.toLowerCase();
|
|
25
|
+
if (IDENTIFIER_HINTS.has(lower))
|
|
26
|
+
return "identifier";
|
|
27
|
+
if (CLASSIFIER_HINTS.has(lower))
|
|
28
|
+
return "classifier";
|
|
29
|
+
if (NUMERIC_HINTS.has(lower))
|
|
30
|
+
return "numeric";
|
|
31
|
+
const nonNull = values.filter((v) => v != null);
|
|
32
|
+
if (nonNull.length > 0 && nonNull.every((v) => typeof v === "number")) {
|
|
33
|
+
return "numeric";
|
|
34
|
+
}
|
|
35
|
+
if (nonNull.length > 0 &&
|
|
36
|
+
nonNull.every((v) => typeof v === "string" && v.length > 50)) {
|
|
37
|
+
return "text";
|
|
38
|
+
}
|
|
39
|
+
if (nonNull.length > 0 && nonNull.every((v) => typeof v === "string")) {
|
|
40
|
+
const uniqueRatio = new Set(nonNull).size / nonNull.length;
|
|
41
|
+
if (uniqueRatio < 0.3)
|
|
42
|
+
return "classifier";
|
|
43
|
+
}
|
|
44
|
+
return "other";
|
|
45
|
+
}
|
|
46
|
+
function inferType(values) {
|
|
47
|
+
const nonNull = values.filter((v) => v != null);
|
|
48
|
+
const hasNull = nonNull.length < values.length;
|
|
49
|
+
if (nonNull.length === 0) {
|
|
50
|
+
return { type: "null" };
|
|
51
|
+
}
|
|
52
|
+
const typeSet = new Set();
|
|
53
|
+
for (const v of nonNull) {
|
|
54
|
+
if (typeof v === "boolean")
|
|
55
|
+
typeSet.add("boolean");
|
|
56
|
+
else if (typeof v === "number")
|
|
57
|
+
typeSet.add("number");
|
|
58
|
+
else
|
|
59
|
+
typeSet.add("string"); // fallback
|
|
60
|
+
}
|
|
61
|
+
const types = [...typeSet].sort();
|
|
62
|
+
if (hasNull)
|
|
63
|
+
types.push("null");
|
|
64
|
+
const result = {
|
|
65
|
+
type: types.length === 1 ? types[0] : types,
|
|
66
|
+
};
|
|
67
|
+
// Enum detection
|
|
68
|
+
if (typeSet.has("string") && typeSet.size === 1) {
|
|
69
|
+
const unique = [...new Set(nonNull)].sort();
|
|
70
|
+
if (unique.length >= 2 && unique.length <= 10 && unique.length <= nonNull.length * 0.6) {
|
|
71
|
+
result.enum = unique;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// Numeric range detection
|
|
75
|
+
if (typeSet.has("number") && typeSet.size === 1) {
|
|
76
|
+
const nums = nonNull;
|
|
77
|
+
const lo = Math.min(...nums);
|
|
78
|
+
const hi = Math.max(...nums);
|
|
79
|
+
if (lo >= 0 && hi <= 1) {
|
|
80
|
+
result.min = 0;
|
|
81
|
+
result.max = 1;
|
|
82
|
+
}
|
|
83
|
+
else if (lo >= 0) {
|
|
84
|
+
result.min = 0;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
// ── SchemaGenerator ────────────────────────────────────────
|
|
90
|
+
export class SchemaGenerator {
|
|
91
|
+
/**
|
|
92
|
+
* Generate a schema draft from JSON samples.
|
|
93
|
+
* Infers field types, enums, ordering, and mapping.
|
|
94
|
+
*/
|
|
95
|
+
fromSamples(samples, options) {
|
|
96
|
+
if (samples.length === 0) {
|
|
97
|
+
throw new Error("need at least 1 sample");
|
|
98
|
+
}
|
|
99
|
+
const { domain, version = 1, description = "" } = options;
|
|
100
|
+
const excludeSet = new Set(options.exclude ?? []);
|
|
101
|
+
const includeSet = options.include ? new Set(options.include) : null;
|
|
102
|
+
const fieldNames = options.fieldNames ?? {};
|
|
103
|
+
const maxDepth = options.maxDepth ?? 3;
|
|
104
|
+
const maxFields = options.maxFields ?? 20;
|
|
105
|
+
const minPresence = options.minPresence ?? 0.1;
|
|
106
|
+
// Step 1: Flatten and collect per-path values
|
|
107
|
+
const pathValues = new Map();
|
|
108
|
+
for (const sample of samples) {
|
|
109
|
+
const flat = flattenKeys(sample, "", maxDepth);
|
|
110
|
+
const seenPaths = new Set();
|
|
111
|
+
for (const [path, value] of Object.entries(flat)) {
|
|
112
|
+
if (excludeSet.has(path))
|
|
113
|
+
continue;
|
|
114
|
+
if (includeSet && !includeSet.has(path))
|
|
115
|
+
continue;
|
|
116
|
+
if (!pathValues.has(path))
|
|
117
|
+
pathValues.set(path, []);
|
|
118
|
+
pathValues.get(path).push(value);
|
|
119
|
+
seenPaths.add(path);
|
|
120
|
+
}
|
|
121
|
+
// Mark missing paths as null
|
|
122
|
+
for (const path of pathValues.keys()) {
|
|
123
|
+
if (!seenPaths.has(path)) {
|
|
124
|
+
pathValues.get(path).push(null);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (pathValues.size === 0) {
|
|
129
|
+
throw new Error("no fields found in samples after filtering");
|
|
130
|
+
}
|
|
131
|
+
// Step 2: Analyze each field
|
|
132
|
+
let analyzed = [];
|
|
133
|
+
for (const [sourcePath, values] of pathValues) {
|
|
134
|
+
const schemaName = fieldNames[sourcePath] ?? sourcePath.split(".").pop();
|
|
135
|
+
const category = classifyField(schemaName, values);
|
|
136
|
+
const typeInfo = inferType(values);
|
|
137
|
+
analyzed.push({ schemaName, sourcePath, category, typeInfo, values });
|
|
138
|
+
}
|
|
139
|
+
// Step 2.5: Drop low-presence fields (appear in < minPresence of samples)
|
|
140
|
+
analyzed = analyzed.filter((f) => {
|
|
141
|
+
const presence = f.values.filter((v) => v != null).length / f.values.length;
|
|
142
|
+
return presence >= minPresence;
|
|
143
|
+
});
|
|
144
|
+
if (analyzed.length === 0) {
|
|
145
|
+
throw new Error("no fields survive presence filter");
|
|
146
|
+
}
|
|
147
|
+
// Step 3: Sort by DCP convention
|
|
148
|
+
analyzed.sort((a, b) => {
|
|
149
|
+
const catA = CATEGORY_ORDER[a.category] ?? 99;
|
|
150
|
+
const catB = CATEGORY_ORDER[b.category] ?? 99;
|
|
151
|
+
if (catA !== catB)
|
|
152
|
+
return catA - catB;
|
|
153
|
+
const presA = a.values.filter((v) => v != null).length / a.values.length;
|
|
154
|
+
const presB = b.values.filter((v) => v != null).length / b.values.length;
|
|
155
|
+
if (presA !== presB)
|
|
156
|
+
return presB - presA; // descending
|
|
157
|
+
return a.schemaName.localeCompare(b.schemaName);
|
|
158
|
+
});
|
|
159
|
+
// Step 3.5: Cap at maxFields (keep highest-priority fields)
|
|
160
|
+
if (analyzed.length > maxFields) {
|
|
161
|
+
analyzed = analyzed.slice(0, maxFields);
|
|
162
|
+
}
|
|
163
|
+
// Step 4: Deduplicate field names
|
|
164
|
+
const seenNames = new Map();
|
|
165
|
+
for (const field of analyzed) {
|
|
166
|
+
const count = seenNames.get(field.schemaName) ?? 0;
|
|
167
|
+
if (count > 0) {
|
|
168
|
+
field.schemaName = `${field.schemaName}_${count}`;
|
|
169
|
+
}
|
|
170
|
+
seenNames.set(field.schemaName, count + 1);
|
|
171
|
+
}
|
|
172
|
+
// Step 5: Build schema and mapping
|
|
173
|
+
const schemaId = `${domain}:v${version}`;
|
|
174
|
+
const fields = analyzed.map((f) => f.schemaName);
|
|
175
|
+
const types = {};
|
|
176
|
+
const paths = {};
|
|
177
|
+
const fieldReports = [];
|
|
178
|
+
for (const f of analyzed) {
|
|
179
|
+
types[f.schemaName] = f.typeInfo;
|
|
180
|
+
paths[f.schemaName] = f.sourcePath;
|
|
181
|
+
const nonNull = f.values.filter((v) => v != null);
|
|
182
|
+
const uniqueCount = new Set(nonNull.map(String)).size;
|
|
183
|
+
const presenceRate = nonNull.length / f.values.length;
|
|
184
|
+
const repetitionRate = nonNull.length > 0 ? 1 - uniqueCount / nonNull.length : 0;
|
|
185
|
+
fieldReports.push({
|
|
186
|
+
name: f.schemaName,
|
|
187
|
+
sourcePath: f.sourcePath,
|
|
188
|
+
category: f.category,
|
|
189
|
+
inferredType: f.typeInfo,
|
|
190
|
+
presenceRate,
|
|
191
|
+
uniqueCount,
|
|
192
|
+
sampleCount: f.values.length,
|
|
193
|
+
isGroupKeyCandidate: repetitionRate > 0.3 &&
|
|
194
|
+
(f.category === "identifier" || f.category === "classifier"),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
const schema = {
|
|
198
|
+
$dcp: "schema",
|
|
199
|
+
id: schemaId,
|
|
200
|
+
description,
|
|
201
|
+
fields,
|
|
202
|
+
fieldCount: fields.length,
|
|
203
|
+
types,
|
|
204
|
+
};
|
|
205
|
+
const mapping = { schemaId, paths };
|
|
206
|
+
return { schema, mapping, fieldReports };
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
/** Format a SchemaDraft as a human-readable report string. */
|
|
210
|
+
export function formatReport(draft) {
|
|
211
|
+
const lines = [
|
|
212
|
+
`Schema: ${draft.schema.id}`,
|
|
213
|
+
`Fields: ${draft.schema.fields.length}`,
|
|
214
|
+
"",
|
|
215
|
+
];
|
|
216
|
+
for (const fr of draft.fieldReports) {
|
|
217
|
+
const t = Array.isArray(fr.inferredType.type)
|
|
218
|
+
? fr.inferredType.type.join("|")
|
|
219
|
+
: fr.inferredType.type;
|
|
220
|
+
const flags = [];
|
|
221
|
+
if (fr.isGroupKeyCandidate)
|
|
222
|
+
flags.push("group_key candidate");
|
|
223
|
+
if (fr.inferredType.enum)
|
|
224
|
+
flags.push(`enum(${fr.inferredType.enum.length})`);
|
|
225
|
+
if (fr.presenceRate < 1.0)
|
|
226
|
+
flags.push(`nullable(${Math.round(fr.presenceRate * 100)}%)`);
|
|
227
|
+
const flagStr = flags.length > 0 ? ` [${flags.join(", ")}]` : "";
|
|
228
|
+
lines.push(` ${fr.name}: ${t} (source: ${fr.sourcePath}, unique: ${fr.uniqueCount}/${fr.sampleCount})${flagStr}`);
|
|
229
|
+
}
|
|
230
|
+
return lines.join("\n");
|
|
231
|
+
}
|
|
232
|
+
//# sourceMappingURL=generator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generator.js","sourceRoot":"","sources":["../src/generator.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE3C,8DAA8D;AAE9D,MAAM,cAAc,GAA2B;IAC7C,UAAU,EAAE,CAAC;IACb,UAAU,EAAE,CAAC;IACb,OAAO,EAAE,CAAC;IACV,IAAI,EAAE,CAAC;IACP,KAAK,EAAE,CAAC;CACT,CAAC;AAEF,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK;IAC/D,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,SAAS;CAC9D,CAAC,CAAC;AAEH,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU;IACjE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ;CAC7C,CAAC,CAAC;AAEH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC5B,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IAC9D,aAAa,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,EAAE,YAAY;IAC3D,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW;CAChD,CAAC,CAAC;AAIH,SAAS,aAAa,CAAC,IAAY,EAAE,MAAiB;IACpD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC;QAAE,OAAO,YAAY,CAAC;IACrD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC;QAAE,OAAO,YAAY,CAAC;IACrD,IAAI,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC;IAE/C,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IAChD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QACtE,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IACE,OAAO,CAAC,MAAM,GAAG,CAAC;QAClB,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAK,CAAY,CAAC,MAAM,GAAG,EAAE,CAAC,EACxE,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QACtE,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC;QAC3D,IAAI,WAAW,GAAG,GAAG;YAAE,OAAO,YAAY,CAAC;IAC7C,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,SAAS,CAAC,MAAiB;IAClC,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAE/C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC1B,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,KAAK,SAAS;YAAE,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;aAC9C,IAAI,OAAO,CAAC,KAAK,QAAQ;YAAE,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;;YACjD,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW;IACzC,CAAC;IAED,MAAM,KAAK,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,OAAO;QAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhC,MAAM,MAAM,GAAiB;QAC3B,IAAI,EAAE,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK;KAC5C,CAAC;IAEF,iBAAiB;IACjB,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,OAAmB,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACxD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,MAAM,CAAC,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACvF,MAAM,CAAC,IAAI,GAAG,MAAM,CAAC;QACvB,CAAC;IACH,CAAC;IAED,0BAA0B;IAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,IAAI,GAAG,OAAmB,CAAC;QACjC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAC7B,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;QAC7B,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YACvB,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;YACf,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;QACjB,CAAC;aAAM,IAAI,EAAE,IAAI,CAAC,EAAE,CAAC;YACnB,MAAM,CAAC,GAAG,GAAG,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAYD,8DAA8D;AAE9D,MAAM,OAAO,eAAe;IAC1B;;;OAGG;IACH,WAAW,CACT,OAAkC,EAClC,OAAwB;QAExB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,EAAE,MAAM,EAAE,OAAO,GAAG,CAAC,EAAE,WAAW,GAAG,EAAE,EAAE,GAAG,OAAO,CAAC;QAC1D,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;QAClD,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACrE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,CAAC,CAAC;QACvC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;QAE/C,8CAA8C;QAC9C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAqB,CAAC;QAEhD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,EAAE,EAAE,EAAE,QAAQ,CAAC,CAAC;YAC/C,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;YAEpC,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBACjD,IAAI,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;oBAAE,SAAS;gBACnC,IAAI,UAAU,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;oBAAE,SAAS;gBAClD,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC;oBAAE,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACpD,UAAU,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBAClC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;YAED,6BAA6B;YAC7B,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC;gBACrC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,UAAU,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,UAAU,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QAED,6BAA6B;QAC7B,IAAI,QAAQ,GAAoB,EAAE,CAAC;QAEnC,KAAK,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC9C,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAG,CAAC;YAC1E,MAAM,QAAQ,GAAG,aAAa,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;YACnD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;YACnC,QAAQ,CAAC,IAAI,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC;QACxE,CAAC;QAED,0EAA0E;QAC1E,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YAC/B,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;YAC5E,OAAO,QAAQ,IAAI,WAAW,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;QACvD,CAAC;QAED,iCAAiC;QACjC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YACrB,MAAM,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC9C,MAAM,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC9C,IAAI,IAAI,KAAK,IAAI;gBAAE,OAAO,IAAI,GAAG,IAAI,CAAC;YAEtC,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;YACzE,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;YACzE,IAAI,KAAK,KAAK,KAAK;gBAAE,OAAO,KAAK,GAAG,KAAK,CAAC,CAAC,aAAa;YAExD,OAAO,CAAC,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,4DAA4D;QAC5D,IAAI,QAAQ,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAChC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;QAED,kCAAkC;QAClC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC5C,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YACnD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;gBACd,KAAK,CAAC,UAAU,GAAG,GAAG,KAAK,CAAC,UAAU,IAAI,KAAK,EAAE,CAAC;YACpD,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,UAAU,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QAC7C,CAAC;QAED,mCAAmC;QACnC,MAAM,QAAQ,GAAG,GAAG,MAAM,KAAK,OAAO,EAAE,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,KAAK,GAAiC,EAAE,CAAC;QAC/C,MAAM,KAAK,GAA2B,EAAE,CAAC;QACzC,MAAM,YAAY,GAAkB,EAAE,CAAC;QAEvC,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;YACzB,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACjC,KAAK,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC;YAEnC,MAAM,OAAO,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;YAClD,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;YACtD,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;YACtD,MAAM,cAAc,GAClB,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YAE5D,YAAY,CAAC,IAAI,CAAC;gBAChB,IAAI,EAAE,CAAC,CAAC,UAAU;gBAClB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,YAAY,EAAE,CAAC,CAAC,QAAQ;gBACxB,YAAY;gBACZ,WAAW;gBACX,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,MAAM;gBAC5B,mBAAmB,EACjB,cAAc,GAAG,GAAG;oBACpB,CAAC,CAAC,CAAC,QAAQ,KAAK,YAAY,IAAI,CAAC,CAAC,QAAQ,KAAK,YAAY,CAAC;aAC/D,CAAC,CAAC;QACL,CAAC;QAED,MAAM,MAAM,GAAiB;YAC3B,IAAI,EAAE,QAAQ;YACd,EAAE,EAAE,QAAQ;YACZ,WAAW;YACX,MAAM;YACN,UAAU,EAAE,MAAM,CAAC,MAAM;YACzB,KAAK;SACN,CAAC;QAEF,MAAM,OAAO,GAAoB,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC;QAErD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;IAC3C,CAAC;CACF;AAED,8DAA8D;AAC9D,MAAM,UAAU,YAAY,CAAC,KAAkB;IAC7C,MAAM,KAAK,GAAG;QACZ,WAAW,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE;QAC5B,WAAW,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,EAAE;QACvC,EAAE;KACH,CAAC;IAEF,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;QACpC,MAAM,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC;YAC3C,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC;YAChC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC;QACzB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,EAAE,CAAC,mBAAmB;YAAE,KAAK,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,EAAE,CAAC,YAAY,CAAC,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;QAC7E,IAAI,EAAE,CAAC,YAAY,GAAG,GAAG;YACvB,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QAClE,KAAK,CAAC,IAAI,CACR,KAAK,EAAE,CAAC,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC,UAAU,aAAa,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,WAAW,IAAI,OAAO,EAAE,CACvG,CAAC;IACJ,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { DcpSchema } from "./schema.js";
|
|
2
|
+
export { FieldMapping, resolvePath, flattenKeys } from "./mapping.js";
|
|
3
|
+
export { SchemaGenerator, formatReport } from "./generator.js";
|
|
4
|
+
export { DcpEncoder, dcpEncode } from "./encoder.js";
|
|
5
|
+
export { DcpDecoder } from "./decoder.js";
|
|
6
|
+
export type { InlineSchema } from "./encoder.js";
|
|
7
|
+
export type { DecodeResult, TemplateMap } from "./decoder.js";
|
|
8
|
+
export type { DcpSchemaDef, FieldTypeDef, FieldMappingDef, GenerateOptions, EncodedBatch, SchemaDraft, FieldReport, } from "./types.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { DcpSchema } from "./schema.js";
|
|
2
|
+
export { FieldMapping, resolvePath, flattenKeys } from "./mapping.js";
|
|
3
|
+
export { SchemaGenerator, formatReport } from "./generator.js";
|
|
4
|
+
export { DcpEncoder, dcpEncode } from "./encoder.js";
|
|
5
|
+
export { DcpDecoder } from "./decoder.js";
|
|
6
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC/D,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { FieldMappingDef } from "./types.js";
|
|
2
|
+
/** Flatten nested object into dot-notation keys with leaf values.
|
|
3
|
+
* @param maxDepth - max nesting depth (default: unlimited). 0 = top-level only.
|
|
4
|
+
*/
|
|
5
|
+
export declare function flattenKeys(obj: Record<string, unknown>, prefix?: string, maxDepth?: number): Record<string, unknown>;
|
|
6
|
+
/** Resolve a dot-notation path against an object. Returns undefined if missing. */
|
|
7
|
+
export declare function resolvePath(obj: unknown, path: string): unknown;
|
|
8
|
+
export declare class FieldMapping {
|
|
9
|
+
readonly schemaId: string;
|
|
10
|
+
readonly paths: Record<string, string>;
|
|
11
|
+
constructor(def: FieldMappingDef);
|
|
12
|
+
/** Resolve all mapped fields from a source object. */
|
|
13
|
+
resolve(source: Record<string, unknown>): Record<string, unknown>;
|
|
14
|
+
/** Resolve fields in schema order, returning a positional array. */
|
|
15
|
+
resolveToRow(source: Record<string, unknown>, fields: string[]): unknown[];
|
|
16
|
+
/** Return a new FieldMapping with some paths overridden. */
|
|
17
|
+
withOverrides(overrides: Record<string, string>): FieldMapping;
|
|
18
|
+
/** Auto-bind schema fields to source paths by name matching. */
|
|
19
|
+
static autoBind(schemaId: string, fields: string[], sample: Record<string, unknown>, overrides?: Record<string, string>): FieldMapping;
|
|
20
|
+
toDef(): FieldMappingDef;
|
|
21
|
+
}
|
package/dist/mapping.js
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/** Flatten nested object into dot-notation keys with leaf values.
|
|
2
|
+
* @param maxDepth - max nesting depth (default: unlimited). 0 = top-level only.
|
|
3
|
+
*/
|
|
4
|
+
export function flattenKeys(obj, prefix = "", maxDepth = Infinity) {
|
|
5
|
+
const result = {};
|
|
6
|
+
const currentDepth = prefix ? prefix.split(".").length : 0;
|
|
7
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
8
|
+
const fullKey = prefix ? `${prefix}.${k}` : k;
|
|
9
|
+
if (v !== null &&
|
|
10
|
+
typeof v === "object" &&
|
|
11
|
+
!Array.isArray(v) &&
|
|
12
|
+
currentDepth < maxDepth) {
|
|
13
|
+
Object.assign(result, flattenKeys(v, fullKey, maxDepth));
|
|
14
|
+
}
|
|
15
|
+
else {
|
|
16
|
+
result[fullKey] = v;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return result;
|
|
20
|
+
}
|
|
21
|
+
/** Resolve a dot-notation path against an object. Returns undefined if missing. */
|
|
22
|
+
export function resolvePath(obj, path) {
|
|
23
|
+
let current = obj;
|
|
24
|
+
for (const segment of path.split(".")) {
|
|
25
|
+
if (current === null || current === undefined || typeof current !== "object") {
|
|
26
|
+
return undefined;
|
|
27
|
+
}
|
|
28
|
+
current = current[segment];
|
|
29
|
+
}
|
|
30
|
+
return current ?? undefined;
|
|
31
|
+
}
|
|
32
|
+
export class FieldMapping {
|
|
33
|
+
schemaId;
|
|
34
|
+
paths;
|
|
35
|
+
constructor(def) {
|
|
36
|
+
this.schemaId = def.schemaId;
|
|
37
|
+
this.paths = { ...def.paths };
|
|
38
|
+
}
|
|
39
|
+
/** Resolve all mapped fields from a source object. */
|
|
40
|
+
resolve(source) {
|
|
41
|
+
const result = {};
|
|
42
|
+
for (const [field, path] of Object.entries(this.paths)) {
|
|
43
|
+
result[field] = resolvePath(source, path);
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
/** Resolve fields in schema order, returning a positional array. */
|
|
48
|
+
resolveToRow(source, fields) {
|
|
49
|
+
const resolved = this.resolve(source);
|
|
50
|
+
return fields.map((f) => resolved[f] ?? null);
|
|
51
|
+
}
|
|
52
|
+
/** Return a new FieldMapping with some paths overridden. */
|
|
53
|
+
withOverrides(overrides) {
|
|
54
|
+
return new FieldMapping({
|
|
55
|
+
schemaId: this.schemaId,
|
|
56
|
+
paths: { ...this.paths, ...overrides },
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
/** Auto-bind schema fields to source paths by name matching. */
|
|
60
|
+
static autoBind(schemaId, fields, sample, overrides) {
|
|
61
|
+
const ov = overrides ?? {};
|
|
62
|
+
const flat = flattenKeys(sample);
|
|
63
|
+
const paths = {};
|
|
64
|
+
for (const fieldName of fields) {
|
|
65
|
+
if (fieldName in ov) {
|
|
66
|
+
paths[fieldName] = ov[fieldName];
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
// Top-level exact match
|
|
70
|
+
if (fieldName in sample &&
|
|
71
|
+
(typeof sample[fieldName] !== "object" || sample[fieldName] === null)) {
|
|
72
|
+
paths[fieldName] = fieldName;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
75
|
+
// Nested leaf match
|
|
76
|
+
const candidates = Object.keys(flat).filter((p) => p.split(".").pop() === fieldName);
|
|
77
|
+
if (candidates.length === 1) {
|
|
78
|
+
paths[fieldName] = candidates[0];
|
|
79
|
+
}
|
|
80
|
+
else if (candidates.length > 1) {
|
|
81
|
+
paths[fieldName] = candidates.reduce((a, b) => a.length <= b.length ? a : b);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return new FieldMapping({ schemaId, paths });
|
|
85
|
+
}
|
|
86
|
+
toDef() {
|
|
87
|
+
return { schemaId: this.schemaId, paths: { ...this.paths } };
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=mapping.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mapping.js","sourceRoot":"","sources":["../src/mapping.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,GAA4B,EAC5B,MAAM,GAAG,EAAE,EACX,QAAQ,GAAG,QAAQ;IAEnB,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,IACE,CAAC,KAAK,IAAI;YACV,OAAO,CAAC,KAAK,QAAQ;YACrB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;YACjB,YAAY,GAAG,QAAQ,EACvB,CAAC;YACD,MAAM,CAAC,MAAM,CACX,MAAM,EACN,WAAW,CAAC,CAA4B,EAAE,OAAO,EAAE,QAAQ,CAAC,CAC7D,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,mFAAmF;AACnF,MAAM,UAAU,WAAW,CAAC,GAAY,EAAE,IAAY;IACpD,IAAI,OAAO,GAAG,GAAG,CAAC;IAClB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;QACtC,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YAC7E,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,OAAO,GAAI,OAAmC,CAAC,OAAO,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,OAAO,IAAI,SAAS,CAAC;AAC9B,CAAC;AAED,MAAM,OAAO,YAAY;IACd,QAAQ,CAAS;IACjB,KAAK,CAAyB;IAEvC,YAAY,GAAoB;QAC9B,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;QAC7B,IAAI,CAAC,KAAK,GAAG,EAAE,GAAG,GAAG,CAAC,KAAK,EAAE,CAAC;IAChC,CAAC;IAED,sDAAsD;IACtD,OAAO,CAAC,MAA+B;QACrC,MAAM,MAAM,GAA4B,EAAE,CAAC;QAC3C,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACvD,MAAM,CAAC,KAAK,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;QAC5C,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,oEAAoE;IACpE,YAAY,CAAC,MAA+B,EAAE,MAAgB;QAC5D,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QACtC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC;IAChD,CAAC;IAED,4DAA4D;IAC5D,aAAa,CAAC,SAAiC;QAC7C,OAAO,IAAI,YAAY,CAAC;YACtB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,KAAK,EAAE,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,GAAG,SAAS,EAAE;SACvC,CAAC,CAAC;IACL,CAAC;IAED,gEAAgE;IAChE,MAAM,CAAC,QAAQ,CACb,QAAgB,EAChB,MAAgB,EAChB,MAA+B,EAC/B,SAAkC;QAElC,MAAM,EAAE,GAAG,SAAS,IAAI,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QACjC,MAAM,KAAK,GAA2B,EAAE,CAAC;QAEzC,KAAK,MAAM,SAAS,IAAI,MAAM,EAAE,CAAC;YAC/B,IAAI,SAAS,IAAI,EAAE,EAAE,CAAC;gBACpB,KAAK,CAAC,SAAS,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;gBACjC,SAAS;YACX,CAAC;YACD,wBAAwB;YACxB,IACE,SAAS,IAAI,MAAM;gBACnB,CAAC,OAAO,MAAM,CAAC,SAAS,CAAC,KAAK,QAAQ,IAAI,MAAM,CAAC,SAAS,CAAC,KAAK,IAAI,CAAC,EACrE,CAAC;gBACD,KAAK,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC;gBAC7B,SAAS;YACX,CAAC;YACD,oBAAoB;YACpB,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,MAAM,CACzC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,SAAS,CACxC,CAAC;YACF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,KAAK,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YACnC,CAAC;iBAAM,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjC,KAAK,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAC5C,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAC7B,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,IAAI,YAAY,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,KAAK;QACH,OAAO,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE,GAAG,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC;IAC/D,CAAC;CACF"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* PicoClaw DCP Hook — out-of-process JSON-RPC over stdio.
|
|
4
|
+
*
|
|
5
|
+
* Two-hook pattern:
|
|
6
|
+
* before_tool — injects queryType=agent for MCP tools with DCP output mode
|
|
7
|
+
* after_tool — DCP-encodes JSON tool results (fallback for tools without native DCP)
|
|
8
|
+
*
|
|
9
|
+
* Config via PICOCLAW_DCP_TOOLS env (JSON):
|
|
10
|
+
* {
|
|
11
|
+
* "mcp_engram_engram_pull": { "id": "engram-recall:v1", "fields": [...] },
|
|
12
|
+
* "web_fetch": "auto"
|
|
13
|
+
* }
|
|
14
|
+
*
|
|
15
|
+
* Config via PICOCLAW_DCP_AGENT_TOOLS env (comma-separated):
|
|
16
|
+
* Tools where before_tool injects queryType=agent.
|
|
17
|
+
* Example: "mcp_engram_engram_pull,mcp_engram_engram_ls"
|
|
18
|
+
*/
|
|
19
|
+
export {};
|