monora-ai 2.0.0 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +441 -150
- package/dist/aims_governance.d.ts +238 -0
- package/dist/aims_governance.d.ts.map +1 -0
- package/dist/aims_governance.js +922 -0
- package/dist/alerts.d.ts +16 -0
- package/dist/alerts.d.ts.map +1 -1
- package/dist/alerts.js +16 -0
- package/dist/api.d.ts +6 -0
- package/dist/api.d.ts.map +1 -1
- package/dist/api.js +6 -0
- package/dist/assessment.d.ts +269 -0
- package/dist/assessment.d.ts.map +1 -0
- package/dist/assessment.js +1232 -0
- package/dist/attestation.js +23 -1
- package/dist/attribution.d.ts +349 -0
- package/dist/attribution.d.ts.map +1 -0
- package/dist/attribution.js +987 -0
- package/dist/autodetect.d.ts +69 -1
- package/dist/autodetect.d.ts.map +1 -1
- package/dist/autodetect.js +644 -1
- package/dist/bias.d.ts +130 -0
- package/dist/bias.d.ts.map +1 -0
- package/dist/bias.js +223 -0
- package/dist/circuit_breaker.js +3 -3
- package/dist/cli/diagnostics.d.ts +5 -1
- package/dist/cli/diagnostics.d.ts.map +1 -1
- package/dist/cli/diagnostics.js +31 -8
- package/dist/cli/doctor.d.ts +25 -0
- package/dist/cli/doctor.d.ts.map +1 -0
- package/dist/cli/doctor.js +381 -0
- package/dist/cli/fix.d.ts +16 -0
- package/dist/cli/fix.d.ts.map +1 -0
- package/dist/cli/fix.js +284 -0
- package/dist/cli/init.d.ts +57 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +205 -0
- package/dist/cli.js +1611 -126
- package/dist/complianceTargets.d.ts +111 -0
- package/dist/complianceTargets.d.ts.map +1 -0
- package/dist/complianceTargets.js +521 -0
- package/dist/config.d.ts +301 -17
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +428 -36
- package/dist/config_migrations.d.ts +41 -0
- package/dist/config_migrations.d.ts.map +1 -1
- package/dist/config_migrations.js +205 -0
- package/dist/config_schema.d.ts +2900 -731
- package/dist/config_schema.d.ts.map +1 -1
- package/dist/config_schema.js +257 -55
- package/dist/context.d.ts +34 -0
- package/dist/context.d.ts.map +1 -1
- package/dist/context.js +118 -7
- package/dist/control_backbone.d.ts +122 -0
- package/dist/control_backbone.d.ts.map +1 -0
- package/dist/control_backbone.js +698 -0
- package/dist/data-governance.d.ts +187 -0
- package/dist/data-governance.d.ts.map +1 -0
- package/dist/data-governance.js +424 -0
- package/dist/dataResidency.d.ts +44 -0
- package/dist/dataResidency.d.ts.map +1 -0
- package/dist/dataResidency.js +203 -0
- package/dist/dispatcher.d.ts +32 -0
- package/dist/dispatcher.d.ts.map +1 -1
- package/dist/dispatcher.js +91 -4
- package/dist/events.d.ts.map +1 -1
- package/dist/events.js +38 -0
- package/dist/evidence_store.d.ts +103 -0
- package/dist/evidence_store.d.ts.map +1 -0
- package/dist/evidence_store.js +459 -0
- package/dist/executiveSummary.d.ts +65 -8
- package/dist/executiveSummary.d.ts.map +1 -1
- package/dist/executiveSummary.js +289 -26
- package/dist/identity.d.ts +143 -0
- package/dist/identity.d.ts.map +1 -0
- package/dist/identity.js +231 -0
- package/dist/impact-assessment.d.ts +350 -0
- package/dist/impact-assessment.d.ts.map +1 -0
- package/dist/impact-assessment.js +580 -0
- package/dist/index.d.ts +25 -5
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +300 -4
- package/dist/instrumentation.d.ts +1 -1
- package/dist/instrumentation.d.ts.map +1 -1
- package/dist/instrumentation.js +243 -27
- package/dist/integrations/anthropic.d.ts +3 -0
- package/dist/integrations/anthropic.d.ts.map +1 -1
- package/dist/integrations/anthropic.js +284 -79
- package/dist/integrations/governance.d.ts +33 -0
- package/dist/integrations/governance.d.ts.map +1 -0
- package/dist/integrations/governance.js +208 -0
- package/dist/integrations/langchain.d.ts +7 -0
- package/dist/integrations/langchain.d.ts.map +1 -1
- package/dist/integrations/langchain.js +387 -143
- package/dist/integrations/openai.d.ts +9 -0
- package/dist/integrations/openai.d.ts.map +1 -1
- package/dist/integrations/openai.js +673 -73
- package/dist/iso42001_consolidation.d.ts +16 -0
- package/dist/iso42001_consolidation.d.ts.map +1 -0
- package/dist/iso42001_consolidation.js +413 -0
- package/dist/iso42001_workflows.d.ts +263 -0
- package/dist/iso42001_workflows.d.ts.map +1 -0
- package/dist/iso42001_workflows.js +781 -0
- package/dist/lifecycle.d.ts +299 -0
- package/dist/lifecycle.d.ts.map +1 -0
- package/dist/lifecycle.js +624 -0
- package/dist/lineage.d.ts +2 -2
- package/dist/lineage.d.ts.map +1 -1
- package/dist/lineage.js +12 -17
- package/dist/middleware/express.d.ts.map +1 -1
- package/dist/middleware/express.js +33 -3
- package/dist/middleware/nextjs.d.ts.map +1 -1
- package/dist/middleware/nextjs.js +42 -68
- package/dist/model.d.ts +143 -0
- package/dist/model.d.ts.map +1 -0
- package/dist/model.js +371 -0
- package/dist/onboarding.d.ts +42 -0
- package/dist/onboarding.d.ts.map +1 -0
- package/dist/onboarding.js +1022 -0
- package/dist/oversight.d.ts +264 -0
- package/dist/oversight.d.ts.map +1 -0
- package/dist/oversight.js +497 -0
- package/dist/pdf_report.d.ts.map +1 -1
- package/dist/pdf_report.js +42 -21
- package/dist/presets.d.ts +88 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/presets.js +520 -0
- package/dist/propagation.d.ts.map +1 -1
- package/dist/propagation.js +34 -2
- package/dist/quotas.d.ts +171 -0
- package/dist/quotas.d.ts.map +1 -0
- package/dist/quotas.js +259 -0
- package/dist/register.d.ts +13 -0
- package/dist/register.d.ts.map +1 -0
- package/dist/register.js +99 -0
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +7 -0
- package/dist/registryData.json +43 -6
- package/dist/report.d.ts +2 -1
- package/dist/report.d.ts.map +1 -1
- package/dist/report.js +189 -2
- package/dist/reporting.d.ts +125 -0
- package/dist/reporting.d.ts.map +1 -1
- package/dist/reporting.js +196 -5
- package/dist/resources.d.ts +285 -0
- package/dist/resources.d.ts.map +1 -0
- package/dist/resources.js +643 -0
- package/dist/risk.d.ts +120 -0
- package/dist/risk.d.ts.map +1 -0
- package/dist/risk.js +220 -0
- package/dist/runtime.d.ts +74 -1
- package/dist/runtime.d.ts.map +1 -1
- package/dist/runtime.js +598 -22
- package/dist/schemaInference.d.ts +92 -0
- package/dist/schemaInference.d.ts.map +1 -0
- package/dist/schemaInference.js +466 -0
- package/dist/schema_validation.js +2 -2
- package/dist/schemas/config.schema.json +169 -6
- package/dist/schemas/event.schema.json +4 -0
- package/dist/security_report.js +4 -4
- package/dist/signing.d.ts +1 -1
- package/dist/signing.d.ts.map +1 -1
- package/dist/signing.js +4 -0
- package/dist/sinks/file.d.ts +19 -1
- package/dist/sinks/file.d.ts.map +1 -1
- package/dist/sinks/file.js +82 -13
- package/dist/sinks/https.d.ts +10 -0
- package/dist/sinks/https.d.ts.map +1 -1
- package/dist/sinks/https.js +76 -16
- package/dist/sinks/stdout.d.ts +1 -0
- package/dist/sinks/stdout.d.ts.map +1 -1
- package/dist/sinks/stdout.js +12 -1
- package/dist/spec.d.ts +159 -0
- package/dist/spec.d.ts.map +1 -0
- package/dist/spec.js +391 -0
- package/dist/stakeholders.d.ts +199 -0
- package/dist/stakeholders.d.ts.map +1 -0
- package/dist/stakeholders.js +398 -0
- package/dist/standards.d.ts.map +1 -1
- package/dist/standards.js +160 -2
- package/dist/standards_ingest.d.ts +2 -2
- package/dist/standards_ingest.d.ts.map +1 -1
- package/dist/standards_ingest.js +105 -23
- package/dist/streaming.d.ts.map +1 -1
- package/dist/streaming.js +7 -2
- package/dist/telemetry.d.ts +16 -2
- package/dist/telemetry.d.ts.map +1 -1
- package/dist/telemetry.js +79 -14
- package/dist/templates/controls/iso42001_control_catalog.json +1443 -0
- package/dist/traced_emitter.d.ts +3 -0
- package/dist/traced_emitter.d.ts.map +1 -1
- package/dist/traced_emitter.js +142 -25
- package/dist/trust_package.d.ts +21 -1
- package/dist/trust_package.d.ts.map +1 -1
- package/dist/trust_package.js +101 -4
- package/dist/verify.d.ts.map +1 -1
- package/dist/verify.js +9 -2
- package/dist/wal.d.ts.map +1 -1
- package/dist/wal.js +2 -1
- package/package.json +14 -1
- package/scripts/postinstall.js +119 -97
- package/templates/controls/iso42001_control_catalog.json +1443 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema Inference - Automatically infer MonoraSpec from sample events.
|
|
3
|
+
*
|
|
4
|
+
* Analyzes sample LLM events and generates a MonoraSpec with field definitions,
|
|
5
|
+
* role assignments, and PII detection.
|
|
6
|
+
*/
|
|
7
|
+
import { FieldType, FieldRole, PIIType, MonoraSpec } from './spec';
|
|
8
|
+
/**
|
|
9
|
+
* Detect the most likely field type from sample values.
|
|
10
|
+
*/
|
|
11
|
+
export declare function detectFieldType(values: any[]): FieldType;
|
|
12
|
+
/**
|
|
13
|
+
* Detect if a field contains PII based on field name and content.
|
|
14
|
+
*/
|
|
15
|
+
export declare function detectPiiType(fieldName: string, values: any[], checkContent?: boolean): PIIType | undefined;
|
|
16
|
+
/**
|
|
17
|
+
* Suggest a role (input/output/metadata/identifier) for a field.
|
|
18
|
+
*/
|
|
19
|
+
export declare function suggestRole(fieldName: string, fieldType: FieldType): FieldRole;
|
|
20
|
+
/**
|
|
21
|
+
* Detect which field is most likely the event timestamp.
|
|
22
|
+
*/
|
|
23
|
+
export declare function detectTimestampField(fieldNames: string[]): string | undefined;
|
|
24
|
+
/**
|
|
25
|
+
* Flatten a nested object into dot-notation keys.
|
|
26
|
+
*/
|
|
27
|
+
export declare function flattenDict(obj: Record<string, any>, parentKey?: string, sep?: string): Record<string, any>;
|
|
28
|
+
/**
|
|
29
|
+
* Collect sample values for each field across events.
|
|
30
|
+
*/
|
|
31
|
+
export declare function collectFieldValues(events: Array<Record<string, any>>, sampleSize?: number): Map<string, any[]>;
|
|
32
|
+
/**
|
|
33
|
+
* Compute statistics for a field's values.
|
|
34
|
+
*/
|
|
35
|
+
export declare function computeFieldStats(values: any[]): Record<string, any>;
|
|
36
|
+
export interface InferSchemaOptions {
|
|
37
|
+
/** Name for the model/application */
|
|
38
|
+
modelName?: string;
|
|
39
|
+
/** Version string */
|
|
40
|
+
modelVersion?: string;
|
|
41
|
+
/** Compliance frameworks to target */
|
|
42
|
+
complianceTargets?: string[];
|
|
43
|
+
/** Whether to scan for PII */
|
|
44
|
+
detectPii?: boolean;
|
|
45
|
+
/** Maximum samples per field for analysis */
|
|
46
|
+
sampleSize?: number;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Infer a MonoraSpec from sample events.
|
|
50
|
+
*
|
|
51
|
+
* Analyzes the structure of sample events and automatically:
|
|
52
|
+
* - Detects field types (string, number, boolean, etc.)
|
|
53
|
+
* - Assigns roles (input, output, metadata, identifier)
|
|
54
|
+
* - Detects PII fields
|
|
55
|
+
* - Identifies the timestamp field
|
|
56
|
+
* - Suggests enrichments based on compliance targets
|
|
57
|
+
*/
|
|
58
|
+
export declare function inferSchemaFromEvents(events: Array<Record<string, any>>, options?: InferSchemaOptions): MonoraSpec;
|
|
59
|
+
/**
|
|
60
|
+
* Load events from a JSONL file.
|
|
61
|
+
*/
|
|
62
|
+
export declare function loadEventsFromFile(filePath: string): Array<Record<string, any>>;
|
|
63
|
+
/**
|
|
64
|
+
* Infer a MonoraSpec from a JSONL file.
|
|
65
|
+
*/
|
|
66
|
+
export declare function inferSchemaFromFile(filePath: string, options?: InferSchemaOptions): MonoraSpec;
|
|
67
|
+
export interface InferenceReportField {
|
|
68
|
+
name: string;
|
|
69
|
+
inferred_type: string;
|
|
70
|
+
suggested_role: string;
|
|
71
|
+
pii_type: string | null;
|
|
72
|
+
stats: Record<string, any>;
|
|
73
|
+
sample_values: any[];
|
|
74
|
+
}
|
|
75
|
+
export interface InferenceReport {
|
|
76
|
+
event_count: number;
|
|
77
|
+
field_count: number;
|
|
78
|
+
timestamp_field: string | undefined;
|
|
79
|
+
fields: InferenceReportField[];
|
|
80
|
+
role_summary: {
|
|
81
|
+
inputs: number;
|
|
82
|
+
outputs: number;
|
|
83
|
+
metadata: number;
|
|
84
|
+
identifiers: number;
|
|
85
|
+
};
|
|
86
|
+
pii_fields: string[];
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Generate a detailed report about inferred schema.
|
|
90
|
+
*/
|
|
91
|
+
export declare function generateInferenceReport(events: Array<Record<string, any>>, sampleSize?: number): InferenceReport;
|
|
92
|
+
//# sourceMappingURL=schemaInference.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schemaInference.d.ts","sourceRoot":"","sources":["../src/schemaInference.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,OAAO,EAEL,SAAS,EACT,SAAS,EACT,OAAO,EACP,UAAU,EAOX,MAAM,QAAQ,CAAC;AAyEhB;;GAEG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,SAAS,CA6CxD;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,GAAG,EAAE,EACb,YAAY,GAAE,OAAc,GAC3B,OAAO,GAAG,SAAS,CA6BrB;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,GAAG,SAAS,CA0B9E;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,SAAS,CAkB7E;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,EACxB,SAAS,GAAE,MAAW,EACtB,GAAG,GAAE,MAAY,GAChB,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAerB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,EAClC,UAAU,GAAE,MAAY,GACvB,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAiBpB;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,GAAG,EAAE,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAiCpE;AAED,MAAM,WAAW,kBAAkB;IACjC,qCAAqC;IACrC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qBAAqB;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,sCAAsC;IACtC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC7B,8BAA8B;IAC9B,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,6CAA6C;IAC7C,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;;GASG;AACH,wBAAgB,qBAAqB,CACnC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,EAClC,OAAO,GAAE,kBAAuB,GAC/B,UAAU,CAmFZ;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAoB/E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,UAAU,CAGZ;AAED,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IAC3B,aAAa,EAAE,GAAG,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,eAAe;IAC9B,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,GAAG,SAAS,CAAC;IACpC,MAAM,EAAE,oBAAoB,EAAE,CAAC;IAC/B,YAAY,EAAE;QACZ,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,EAClC,UAAU,GAAE,MAAY,GACvB,eAAe,CAoDjB"}
|
|
@@ -0,0 +1,466 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Schema Inference - Automatically infer MonoraSpec from sample events.
|
|
4
|
+
*
|
|
5
|
+
* Analyzes sample LLM events and generates a MonoraSpec with field definitions,
|
|
6
|
+
* role assignments, and PII detection.
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.detectFieldType = detectFieldType;
|
|
43
|
+
exports.detectPiiType = detectPiiType;
|
|
44
|
+
exports.suggestRole = suggestRole;
|
|
45
|
+
exports.detectTimestampField = detectTimestampField;
|
|
46
|
+
exports.flattenDict = flattenDict;
|
|
47
|
+
exports.collectFieldValues = collectFieldValues;
|
|
48
|
+
exports.computeFieldStats = computeFieldStats;
|
|
49
|
+
exports.inferSchemaFromEvents = inferSchemaFromEvents;
|
|
50
|
+
exports.loadEventsFromFile = loadEventsFromFile;
|
|
51
|
+
exports.inferSchemaFromFile = inferSchemaFromFile;
|
|
52
|
+
exports.generateInferenceReport = generateInferenceReport;
|
|
53
|
+
const fs = __importStar(require("fs"));
|
|
54
|
+
const path = __importStar(require("path"));
|
|
55
|
+
const spec_1 = require("./spec");
|
|
56
|
+
/** Patterns for detecting input fields */
|
|
57
|
+
const INPUT_FIELD_PATTERNS = [
|
|
58
|
+
/.*prompt.*/i,
|
|
59
|
+
/.*query.*/i,
|
|
60
|
+
/.*question.*/i,
|
|
61
|
+
/.*input.*/i,
|
|
62
|
+
/.*message.*/i,
|
|
63
|
+
/.*request.*/i,
|
|
64
|
+
/.*user_input.*/i,
|
|
65
|
+
/.*context.*/i,
|
|
66
|
+
/.*instruction.*/i,
|
|
67
|
+
/.*system.*/i,
|
|
68
|
+
];
|
|
69
|
+
/** Patterns for detecting output fields */
|
|
70
|
+
const OUTPUT_FIELD_PATTERNS = [
|
|
71
|
+
/.*response.*/i,
|
|
72
|
+
/.*answer.*/i,
|
|
73
|
+
/.*output.*/i,
|
|
74
|
+
/.*completion.*/i,
|
|
75
|
+
/.*result.*/i,
|
|
76
|
+
/.*reply.*/i,
|
|
77
|
+
/.*generated.*/i,
|
|
78
|
+
/.*content.*/i,
|
|
79
|
+
/.*text.*/i,
|
|
80
|
+
];
|
|
81
|
+
/** Patterns for detecting identifier fields */
|
|
82
|
+
const IDENTIFIER_FIELD_PATTERNS = [
|
|
83
|
+
/.*_id$/i,
|
|
84
|
+
/^id$/i,
|
|
85
|
+
/.*event_id.*/i,
|
|
86
|
+
/.*trace_id.*/i,
|
|
87
|
+
/.*span_id.*/i,
|
|
88
|
+
/.*request_id.*/i,
|
|
89
|
+
/.*session_id.*/i,
|
|
90
|
+
/.*correlation_id.*/i,
|
|
91
|
+
/.*uuid.*/i,
|
|
92
|
+
];
|
|
93
|
+
/** Patterns for detecting timestamp fields */
|
|
94
|
+
const TIMESTAMP_FIELD_PATTERNS = [
|
|
95
|
+
/.*timestamp.*/i,
|
|
96
|
+
/.*time.*/i,
|
|
97
|
+
/.*created_at.*/i,
|
|
98
|
+
/.*updated_at.*/i,
|
|
99
|
+
/.*date.*/i,
|
|
100
|
+
/.*_at$/i,
|
|
101
|
+
];
|
|
102
|
+
/** PII detection patterns */
|
|
103
|
+
const PII_PATTERNS = new Map([
|
|
104
|
+
['email', /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/],
|
|
105
|
+
['phone', /\+?[\d\s\-\(\)]{10,}/],
|
|
106
|
+
['ssn', /\d{3}-?\d{2}-?\d{4}/],
|
|
107
|
+
['credit_card', /\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}/],
|
|
108
|
+
['ip_address', /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/],
|
|
109
|
+
]);
|
|
110
|
+
/** PII field name hints */
|
|
111
|
+
const PII_FIELD_NAME_HINTS = new Map([
|
|
112
|
+
['email', ['email', 'e_mail', 'mail', 'email_address']],
|
|
113
|
+
['phone', ['phone', 'telephone', 'mobile', 'cell', 'phone_number']],
|
|
114
|
+
['ssn', ['ssn', 'social_security', 'tax_id']],
|
|
115
|
+
['credit_card', ['card', 'credit_card', 'cc_number', 'card_number']],
|
|
116
|
+
['name', ['name', 'first_name', 'last_name', 'full_name', 'user_name', 'username']],
|
|
117
|
+
['address', ['address', 'street', 'city', 'zip', 'postal', 'location']],
|
|
118
|
+
['ip_address', ['ip', 'ip_address', 'client_ip', 'remote_ip']],
|
|
119
|
+
['date_of_birth', ['dob', 'date_of_birth', 'birth_date', 'birthday']],
|
|
120
|
+
]);
|
|
121
|
+
/**
|
|
122
|
+
* Detect the most likely field type from sample values.
|
|
123
|
+
*/
|
|
124
|
+
function detectFieldType(values) {
|
|
125
|
+
if (!values || values.length === 0) {
|
|
126
|
+
return 'string';
|
|
127
|
+
}
|
|
128
|
+
const nonNullValues = values.filter((v) => v !== null && v !== undefined);
|
|
129
|
+
if (nonNullValues.length === 0) {
|
|
130
|
+
return 'string';
|
|
131
|
+
}
|
|
132
|
+
const typeCounts = new Map();
|
|
133
|
+
for (const value of nonNullValues) {
|
|
134
|
+
let fieldType;
|
|
135
|
+
if (typeof value === 'boolean') {
|
|
136
|
+
fieldType = 'boolean';
|
|
137
|
+
}
|
|
138
|
+
else if (Number.isInteger(value)) {
|
|
139
|
+
fieldType = 'integer';
|
|
140
|
+
}
|
|
141
|
+
else if (typeof value === 'number') {
|
|
142
|
+
fieldType = 'number';
|
|
143
|
+
}
|
|
144
|
+
else if (typeof value === 'string') {
|
|
145
|
+
fieldType = 'string';
|
|
146
|
+
}
|
|
147
|
+
else if (Array.isArray(value)) {
|
|
148
|
+
fieldType = 'array';
|
|
149
|
+
}
|
|
150
|
+
else if (typeof value === 'object') {
|
|
151
|
+
fieldType = 'object';
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
fieldType = 'string';
|
|
155
|
+
}
|
|
156
|
+
typeCounts.set(fieldType, (typeCounts.get(fieldType) || 0) + 1);
|
|
157
|
+
}
|
|
158
|
+
// Return the most common type
|
|
159
|
+
let maxType = 'string';
|
|
160
|
+
let maxCount = 0;
|
|
161
|
+
for (const [type, count] of typeCounts) {
|
|
162
|
+
if (count > maxCount) {
|
|
163
|
+
maxCount = count;
|
|
164
|
+
maxType = type;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return maxType;
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Detect if a field contains PII based on field name and content.
|
|
171
|
+
*/
|
|
172
|
+
function detectPiiType(fieldName, values, checkContent = true) {
|
|
173
|
+
const fieldNameLower = fieldName.toLowerCase();
|
|
174
|
+
// Check field name hints first (faster)
|
|
175
|
+
for (const [piiType, hints] of PII_FIELD_NAME_HINTS) {
|
|
176
|
+
for (const hint of hints) {
|
|
177
|
+
if (fieldNameLower.includes(hint)) {
|
|
178
|
+
return piiType;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
// Check content patterns if enabled
|
|
183
|
+
if (checkContent) {
|
|
184
|
+
const stringValues = values
|
|
185
|
+
.filter((v) => v !== null && v !== undefined)
|
|
186
|
+
.map((v) => String(v))
|
|
187
|
+
.slice(0, 10); // Check first 10 values
|
|
188
|
+
for (const [piiType, pattern] of PII_PATTERNS) {
|
|
189
|
+
for (const value of stringValues) {
|
|
190
|
+
if (pattern.test(value)) {
|
|
191
|
+
return piiType;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return undefined;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Suggest a role (input/output/metadata/identifier) for a field.
|
|
200
|
+
*/
|
|
201
|
+
function suggestRole(fieldName, fieldType) {
|
|
202
|
+
const fieldNameLower = fieldName.toLowerCase();
|
|
203
|
+
// Check identifier patterns first (highest priority)
|
|
204
|
+
for (const pattern of IDENTIFIER_FIELD_PATTERNS) {
|
|
205
|
+
if (pattern.test(fieldNameLower)) {
|
|
206
|
+
return 'identifier';
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
// Check input patterns
|
|
210
|
+
for (const pattern of INPUT_FIELD_PATTERNS) {
|
|
211
|
+
if (pattern.test(fieldNameLower)) {
|
|
212
|
+
return 'input';
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
// Check output patterns
|
|
216
|
+
for (const pattern of OUTPUT_FIELD_PATTERNS) {
|
|
217
|
+
if (pattern.test(fieldNameLower)) {
|
|
218
|
+
return 'output';
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Default to metadata
|
|
222
|
+
return 'metadata';
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Detect which field is most likely the event timestamp.
|
|
226
|
+
*/
|
|
227
|
+
function detectTimestampField(fieldNames) {
|
|
228
|
+
for (const pattern of TIMESTAMP_FIELD_PATTERNS) {
|
|
229
|
+
for (const name of fieldNames) {
|
|
230
|
+
if (pattern.test(name.toLowerCase())) {
|
|
231
|
+
return name;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
// Check for common exact names
|
|
236
|
+
const commonNames = ['timestamp', 'time', 'created_at', 'event_time', 'ts'];
|
|
237
|
+
for (const name of fieldNames) {
|
|
238
|
+
if (commonNames.includes(name.toLowerCase())) {
|
|
239
|
+
return name;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
return undefined;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Flatten a nested object into dot-notation keys.
|
|
246
|
+
*/
|
|
247
|
+
function flattenDict(obj, parentKey = '', sep = '.') {
|
|
248
|
+
const items = [];
|
|
249
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
250
|
+
const newKey = parentKey ? `${parentKey}${sep}${k}` : k;
|
|
251
|
+
if (v && typeof v === 'object' && !Array.isArray(v)) {
|
|
252
|
+
const nested = flattenDict(v, newKey, sep);
|
|
253
|
+
items.push(...Object.entries(nested));
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
items.push([newKey, v]);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return Object.fromEntries(items);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Collect sample values for each field across events.
|
|
263
|
+
*/
|
|
264
|
+
function collectFieldValues(events, sampleSize = 100) {
|
|
265
|
+
const fieldValues = new Map();
|
|
266
|
+
const eventsToProcess = events.slice(0, sampleSize * 10);
|
|
267
|
+
for (const event of eventsToProcess) {
|
|
268
|
+
const flat = flattenDict(event);
|
|
269
|
+
for (const [key, value] of Object.entries(flat)) {
|
|
270
|
+
const values = fieldValues.get(key) || [];
|
|
271
|
+
if (values.length < sampleSize) {
|
|
272
|
+
values.push(value);
|
|
273
|
+
fieldValues.set(key, values);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return fieldValues;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Compute statistics for a field's values.
|
|
281
|
+
*/
|
|
282
|
+
function computeFieldStats(values) {
|
|
283
|
+
const nonNull = values.filter((v) => v !== null && v !== undefined);
|
|
284
|
+
const stats = {
|
|
285
|
+
total_count: values.length,
|
|
286
|
+
non_null_count: nonNull.length,
|
|
287
|
+
null_count: values.length - nonNull.length,
|
|
288
|
+
presence_ratio: values.length > 0 ? nonNull.length / values.length : 0,
|
|
289
|
+
};
|
|
290
|
+
// Type-specific stats
|
|
291
|
+
if (nonNull.length > 0) {
|
|
292
|
+
const firstVal = nonNull[0];
|
|
293
|
+
if (typeof firstVal === 'number') {
|
|
294
|
+
const numericVals = nonNull.filter((v) => typeof v === 'number');
|
|
295
|
+
if (numericVals.length > 0) {
|
|
296
|
+
stats.min = Math.min(...numericVals);
|
|
297
|
+
stats.max = Math.max(...numericVals);
|
|
298
|
+
stats.avg = numericVals.reduce((a, b) => a + b, 0) / numericVals.length;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
else if (typeof firstVal === 'string') {
|
|
302
|
+
const stringVals = nonNull.filter((v) => typeof v === 'string');
|
|
303
|
+
if (stringVals.length > 0) {
|
|
304
|
+
const lengths = stringVals.map((s) => s.length);
|
|
305
|
+
stats.min_length = Math.min(...lengths);
|
|
306
|
+
stats.max_length = Math.max(...lengths);
|
|
307
|
+
stats.avg_length = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return stats;
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Infer a MonoraSpec from sample events.
|
|
315
|
+
*
|
|
316
|
+
* Analyzes the structure of sample events and automatically:
|
|
317
|
+
* - Detects field types (string, number, boolean, etc.)
|
|
318
|
+
* - Assigns roles (input, output, metadata, identifier)
|
|
319
|
+
* - Detects PII fields
|
|
320
|
+
* - Identifies the timestamp field
|
|
321
|
+
* - Suggests enrichments based on compliance targets
|
|
322
|
+
*/
|
|
323
|
+
function inferSchemaFromEvents(events, options = {}) {
|
|
324
|
+
const { modelName, modelVersion = '1.0.0', complianceTargets = [], detectPii = true, sampleSize = 100, } = options;
|
|
325
|
+
if (!events || events.length === 0) {
|
|
326
|
+
throw new Error('Cannot infer schema from empty events list');
|
|
327
|
+
}
|
|
328
|
+
// Collect field values
|
|
329
|
+
const fieldValues = collectFieldValues(events, sampleSize);
|
|
330
|
+
// Detect timestamp field
|
|
331
|
+
const timestampField = detectTimestampField(Array.from(fieldValues.keys()));
|
|
332
|
+
// Analyze each field
|
|
333
|
+
const inputs = [];
|
|
334
|
+
const outputs = [];
|
|
335
|
+
const metadata = [];
|
|
336
|
+
const identifiers = [];
|
|
337
|
+
for (const [fieldName, values] of fieldValues) {
|
|
338
|
+
// Detect type
|
|
339
|
+
const fieldType = detectFieldType(values);
|
|
340
|
+
// Detect PII
|
|
341
|
+
let piiType;
|
|
342
|
+
if (detectPii) {
|
|
343
|
+
piiType = detectPiiType(fieldName, values);
|
|
344
|
+
}
|
|
345
|
+
// Compute stats
|
|
346
|
+
const stats = computeFieldStats(values);
|
|
347
|
+
// Create field spec
|
|
348
|
+
const fieldSpec = (0, spec_1.createFieldSpec)(fieldName, fieldType, {
|
|
349
|
+
required: stats.presence_ratio > 0.95,
|
|
350
|
+
nullable: stats.null_count > 0,
|
|
351
|
+
piiType,
|
|
352
|
+
});
|
|
353
|
+
// Assign role
|
|
354
|
+
const role = suggestRole(fieldName, fieldType);
|
|
355
|
+
switch (role) {
|
|
356
|
+
case 'input':
|
|
357
|
+
inputs.push(fieldSpec);
|
|
358
|
+
break;
|
|
359
|
+
case 'output':
|
|
360
|
+
outputs.push(fieldSpec);
|
|
361
|
+
break;
|
|
362
|
+
case 'identifier':
|
|
363
|
+
identifiers.push(fieldSpec);
|
|
364
|
+
break;
|
|
365
|
+
default:
|
|
366
|
+
metadata.push(fieldSpec);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
// Build enrichment config from compliance targets
|
|
370
|
+
let enrichments = (0, spec_1.createEnrichmentConfig)();
|
|
371
|
+
if (complianceTargets.length > 0) {
|
|
372
|
+
for (const framework of complianceTargets) {
|
|
373
|
+
const frameworkConfig = (0, spec_1.enrichmentConfigForFramework)(framework);
|
|
374
|
+
enrichments = (0, spec_1.mergeEnrichmentConfigs)(enrichments, frameworkConfig);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return (0, spec_1.createMonoraSpec)({
|
|
378
|
+
inputs,
|
|
379
|
+
outputs,
|
|
380
|
+
metadata,
|
|
381
|
+
identifiers,
|
|
382
|
+
enrichments,
|
|
383
|
+
complianceTargets,
|
|
384
|
+
modelName,
|
|
385
|
+
modelVersion,
|
|
386
|
+
eventTimestampField: timestampField || 'timestamp',
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
/**
|
|
390
|
+
* Load events from a JSONL file.
|
|
391
|
+
*/
|
|
392
|
+
function loadEventsFromFile(filePath) {
|
|
393
|
+
const events = [];
|
|
394
|
+
const resolvedPath = path.resolve(filePath);
|
|
395
|
+
const content = fs.readFileSync(resolvedPath, 'utf-8');
|
|
396
|
+
const lines = content.split('\n');
|
|
397
|
+
for (const line of lines) {
|
|
398
|
+
const trimmed = line.trim();
|
|
399
|
+
if (trimmed) {
|
|
400
|
+
try {
|
|
401
|
+
events.push(JSON.parse(trimmed));
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
// Skip invalid JSON lines
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
return events;
|
|
410
|
+
}
|
|
411
|
+
/**
|
|
412
|
+
* Infer a MonoraSpec from a JSONL file.
|
|
413
|
+
*/
|
|
414
|
+
function inferSchemaFromFile(filePath, options = {}) {
|
|
415
|
+
const events = loadEventsFromFile(filePath);
|
|
416
|
+
return inferSchemaFromEvents(events, options);
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Generate a detailed report about inferred schema.
|
|
420
|
+
*/
|
|
421
|
+
function generateInferenceReport(events, sampleSize = 100) {
|
|
422
|
+
const fieldValues = collectFieldValues(events, sampleSize);
|
|
423
|
+
const fieldsReport = [];
|
|
424
|
+
const roleCounts = { inputs: 0, outputs: 0, metadata: 0, identifiers: 0 };
|
|
425
|
+
const piiFields = [];
|
|
426
|
+
for (const [fieldName, values] of fieldValues) {
|
|
427
|
+
const fieldType = detectFieldType(values);
|
|
428
|
+
const piiType = detectPiiType(fieldName, values);
|
|
429
|
+
const role = suggestRole(fieldName, fieldType);
|
|
430
|
+
const stats = computeFieldStats(values);
|
|
431
|
+
fieldsReport.push({
|
|
432
|
+
name: fieldName,
|
|
433
|
+
inferred_type: fieldType,
|
|
434
|
+
suggested_role: role,
|
|
435
|
+
pii_type: piiType || null,
|
|
436
|
+
stats,
|
|
437
|
+
sample_values: values.slice(0, 5),
|
|
438
|
+
});
|
|
439
|
+
// Update role counts
|
|
440
|
+
switch (role) {
|
|
441
|
+
case 'input':
|
|
442
|
+
roleCounts.inputs++;
|
|
443
|
+
break;
|
|
444
|
+
case 'output':
|
|
445
|
+
roleCounts.outputs++;
|
|
446
|
+
break;
|
|
447
|
+
case 'identifier':
|
|
448
|
+
roleCounts.identifiers++;
|
|
449
|
+
break;
|
|
450
|
+
default:
|
|
451
|
+
roleCounts.metadata++;
|
|
452
|
+
}
|
|
453
|
+
if (piiType) {
|
|
454
|
+
piiFields.push(fieldName);
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
const timestampField = detectTimestampField(Array.from(fieldValues.keys()));
|
|
458
|
+
return {
|
|
459
|
+
event_count: events.length,
|
|
460
|
+
field_count: fieldValues.size,
|
|
461
|
+
timestamp_field: timestampField,
|
|
462
|
+
fields: fieldsReport,
|
|
463
|
+
role_summary: roleCounts,
|
|
464
|
+
pii_fields: piiFields,
|
|
465
|
+
};
|
|
466
|
+
}
|
|
@@ -41,8 +41,8 @@ exports.validateTrustSummarySchema = validateTrustSummarySchema;
|
|
|
41
41
|
exports.validateConfigSchema = validateConfigSchema;
|
|
42
42
|
const fs = __importStar(require("fs"));
|
|
43
43
|
const path = __importStar(require("path"));
|
|
44
|
-
const
|
|
45
|
-
const ajv = new
|
|
44
|
+
const _2020_1 = __importDefault(require("ajv/dist/2020"));
|
|
45
|
+
const ajv = new _2020_1.default({ allErrors: true, strict: false });
|
|
46
46
|
const schemaDir = path.join(__dirname, 'schemas');
|
|
47
47
|
function loadSchema(name) {
|
|
48
48
|
const schemaPath = path.join(schemaDir, name);
|