plm-shared 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plm_shared/__init__.py +11 -0
- plm_shared/_w1_13_backfill.py +159 -0
- plm_shared/agents/__init__.py +14 -0
- plm_shared/agents/base_agent.py +110 -0
- plm_shared/agents/domain_classifier.py +86 -0
- plm_shared/agents/domain_prompts.py +99 -0
- plm_shared/agents/raci_extractor.py +80 -0
- plm_shared/artifact_publisher.py +342 -0
- plm_shared/artifact_store.py +244 -0
- plm_shared/autonomy.py +188 -0
- plm_shared/autonomy_gating.yaml +73 -0
- plm_shared/capabilities.py +64 -0
- plm_shared/capabilities.yaml +29 -0
- plm_shared/capability_registry/__init__.py +49 -0
- plm_shared/capability_registry/types.py +261 -0
- plm_shared/capability_registry.yaml +355 -0
- plm_shared/cors.py +223 -0
- plm_shared/db.py +599 -0
- plm_shared/errors/__init__.py +29 -0
- plm_shared/errors/envelope.py +32 -0
- plm_shared/governance/__init__.py +142 -0
- plm_shared/governance/bus.py +155 -0
- plm_shared/governance/envelope.py +168 -0
- plm_shared/governance/events.py +300 -0
- plm_shared/governance/hitl.py +198 -0
- plm_shared/governance/invariants.py +224 -0
- plm_shared/governance/middleware.py +249 -0
- plm_shared/governance/signed_conformance.py +187 -0
- plm_shared/idempotency.py +67 -0
- plm_shared/identity.py +313 -0
- plm_shared/invocation_kinds.py +64 -0
- plm_shared/kernel_api.py +169 -0
- plm_shared/knowledge/__init__.py +66 -0
- plm_shared/knowledge/primitives_v1.py +287 -0
- plm_shared/knowledge_envelope.py +45 -0
- plm_shared/llm_errors.py +329 -0
- plm_shared/mcp/__init__.py +13 -0
- plm_shared/mcp/federation/__init__.py +44 -0
- plm_shared/mcp/federation/builder.py +416 -0
- plm_shared/mcp/federation/manifest_v1.py +134 -0
- plm_shared/middleware.py +201 -0
- plm_shared/models/__init__.py +24 -0
- plm_shared/models/cleansing.py +284 -0
- plm_shared/models/db.py +292 -0
- plm_shared/models/document_schemas.py +94 -0
- plm_shared/models/issue_schemas.py +104 -0
- plm_shared/models/parse_schemas.py +76 -0
- plm_shared/parsers/__init__.py +15 -0
- plm_shared/parsers/bpmn_text.py +152 -0
- plm_shared/plm_schemas.py +243 -0
- plm_shared/pricing.py +196 -0
- plm_shared/protocols/__init__.py +52 -0
- plm_shared/protocols/_registry.py +130 -0
- plm_shared/protocols/services/__init__.py +8 -0
- plm_shared/protocols/services/agent.py +55 -0
- plm_shared/protocols/services/database.py +83 -0
- plm_shared/protocols/services/feature_flags.py +47 -0
- plm_shared/protocols/services/llm.py +142 -0
- plm_shared/protocols/services/plan_analysis.py +69 -0
- plm_shared/protocols/services/rag.py +67 -0
- plm_shared/protocols/services/retry.py +109 -0
- plm_shared/protocols/services/settings.py +69 -0
- plm_shared/protocols/services/telemetry.py +50 -0
- plm_shared/skill_auth.py +161 -0
- plm_shared/skill_auth.yaml +97 -0
- plm_shared/sqlalchemy_kernel_emitter.py +522 -0
- plm_shared/system_context.py +59 -0
- plm_shared/telemetry/__init__.py +16 -0
- plm_shared/trace/__init__.py +55 -0
- plm_shared/trace/conformance_gate.py +145 -0
- plm_shared/trace/envelope.py +130 -0
- plm_shared/trace/manifest.py +75 -0
- plm_shared/trace/promotion_rule.py +92 -0
- plm_shared/trace/quality_levels.py +85 -0
- plm_shared/trace_context.py +205 -0
- plm_shared/utils/__init__.py +7 -0
- plm_shared/utils/plm_tools.py +531 -0
- plm_shared-0.1.0.dist-info/METADATA +264 -0
- plm_shared-0.1.0.dist-info/RECORD +81 -0
- plm_shared-0.1.0.dist-info/WHEEL +5 -0
- plm_shared-0.1.0.dist-info/top_level.txt +1 -0
plm_shared/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""plm-shared — frozen contracts shared across TracePulse PLM product lines.
|
|
2
|
+
|
|
3
|
+
US-W1.0 (PR-1): the modules in this package publish the Kernel emission
|
|
4
|
+
API surface, idempotency key, capability registry, knowledge envelope,
|
|
5
|
+
trace context, pricing helpers, and artifact-store protocol consumed by
|
|
6
|
+
the rest of Wave 1. Models in `kernel_api`, `idempotency`,
|
|
7
|
+
`knowledge_envelope`, and `capabilities` are frozen — never modified by
|
|
8
|
+
downstream stories — so the rest of the wave can be implemented in
|
|
9
|
+
parallel without contract drift.
|
|
10
|
+
"""
|
|
11
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""US-W1.13 / Conv I — idempotent backfill of Wave 0 metadata to typed columns.
|
|
2
|
+
|
|
3
|
+
Private module implementing the actual UPDATE statements + summary
|
|
4
|
+
shape used by the W1.13 backfill. Surfaced at the CLI level by
|
|
5
|
+
``02_App/plm-shared/scripts/backfill_w1_13_typed_columns.py``; the
|
|
6
|
+
audit test (`tests/audit/test_w1_13_backfill.py`) imports
|
|
7
|
+
``run_backfill`` from this module directly.
|
|
8
|
+
|
|
9
|
+
Walks every `telemetry_events` row whose typed column is NULL,
|
|
10
|
+
reads the equivalent value from `payload` JSONB, writes it into
|
|
11
|
+
the new column. Re-running on a fully-backfilled DB yields zero
|
|
12
|
+
further updates (W1.13 §AC-7 — idempotency).
|
|
13
|
+
|
|
14
|
+
Runs as `plm_migrator` per migration 0008 + Decision #16:
|
|
15
|
+
`plm_kernel_writer` is REVOKED UPDATE on `telemetry_events` per
|
|
16
|
+
the append-only rule, and a backfill IS an UPDATE. The script
|
|
17
|
+
issues `SET LOCAL ROLE plm_migrator` inside each transaction so
|
|
18
|
+
even a superuser-mode invocation stays faithful to the production
|
|
19
|
+
privilege contract.
|
|
20
|
+
|
|
21
|
+
llm_calls.pricing_source backfill is NOT covered here — the column
|
|
22
|
+
lives in PG, but pre-0016 telemetry-side rows that carried
|
|
23
|
+
`pricing_source` rode in `telemetry_events.payload`, not
|
|
24
|
+
`llm_calls.payload` (`llm_calls` has no payload JSONB column).
|
|
25
|
+
The legacy backend's SQLite `agent_invocations.metadata.pricing_source`
|
|
26
|
+
column is handled by `02_App/backend/database.py`'s startup
|
|
27
|
+
ALTER-TABLE pattern, not by this PG backfill.
|
|
28
|
+
"""
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import os
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import Optional
|
|
34
|
+
|
|
35
|
+
from sqlalchemy import create_engine, text
|
|
36
|
+
from sqlalchemy.engine import Engine
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass(frozen=True)
|
|
40
|
+
class BackfillSummary:
|
|
41
|
+
"""Per-column rowcounts from a single backfill invocation.
|
|
42
|
+
|
|
43
|
+
A second invocation against a fully-backfilled DB returns
|
|
44
|
+
all-zeros — that's the AC-7 idempotency contract.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
updated_invocation_kind: int
|
|
48
|
+
updated_is_eval: int
|
|
49
|
+
updated_parent_invocation_id: int
|
|
50
|
+
updated_root_invocation_id: int
|
|
51
|
+
updated_system_context: int
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def total(self) -> int:
|
|
55
|
+
return (
|
|
56
|
+
self.updated_invocation_kind
|
|
57
|
+
+ self.updated_is_eval
|
|
58
|
+
+ self.updated_parent_invocation_id
|
|
59
|
+
+ self.updated_root_invocation_id
|
|
60
|
+
+ self.updated_system_context
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
_INVOCATION_KIND_VALUES = ("user", "system", "hidden_retry", "background")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
_BACKFILL_INVOCATION_KIND = text("""
|
|
68
|
+
UPDATE telemetry_events
|
|
69
|
+
SET invocation_kind = (payload->>'invocation_kind')::invocation_kind_v1
|
|
70
|
+
WHERE invocation_kind IS NULL
|
|
71
|
+
AND payload ? 'invocation_kind'
|
|
72
|
+
AND payload->>'invocation_kind' = ANY(:valid_kinds)
|
|
73
|
+
""")
|
|
74
|
+
|
|
75
|
+
# `is_eval` is non-nullable with default `false`. The migration
|
|
76
|
+
# already populated every legacy row to `false` via the server
|
|
77
|
+
# default; the only useful UPDATE is for rows whose payload
|
|
78
|
+
# explicitly carries `is_eval=true`. Idempotent: re-run on a
|
|
79
|
+
# flipped row no-ops because the WHERE filters it out.
|
|
80
|
+
_BACKFILL_IS_EVAL = text("""
|
|
81
|
+
UPDATE telemetry_events
|
|
82
|
+
SET is_eval = true
|
|
83
|
+
WHERE is_eval = false
|
|
84
|
+
AND payload ? 'is_eval'
|
|
85
|
+
AND lower(payload->>'is_eval') IN ('true', '1')
|
|
86
|
+
""")
|
|
87
|
+
|
|
88
|
+
_BACKFILL_PARENT_INVOCATION_ID = text("""
|
|
89
|
+
UPDATE telemetry_events
|
|
90
|
+
SET parent_invocation_id = (payload->>'parent_invocation_id')::uuid
|
|
91
|
+
WHERE parent_invocation_id IS NULL
|
|
92
|
+
AND payload ? 'parent_invocation_id'
|
|
93
|
+
AND payload->>'parent_invocation_id' ~* '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
_BACKFILL_ROOT_INVOCATION_ID = text("""
|
|
97
|
+
UPDATE telemetry_events
|
|
98
|
+
SET root_invocation_id = (payload->>'root_invocation_id')::uuid
|
|
99
|
+
WHERE root_invocation_id IS NULL
|
|
100
|
+
AND payload ? 'root_invocation_id'
|
|
101
|
+
AND payload->>'root_invocation_id' ~* '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
|
|
102
|
+
""")
|
|
103
|
+
|
|
104
|
+
_BACKFILL_SYSTEM_CONTEXT = text("""
|
|
105
|
+
UPDATE telemetry_events
|
|
106
|
+
SET system_context = payload->>'system_context'
|
|
107
|
+
WHERE system_context IS NULL
|
|
108
|
+
AND payload ? 'system_context'
|
|
109
|
+
""")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def run_backfill(engine: Engine) -> BackfillSummary:
|
|
113
|
+
"""Apply every per-column backfill UPDATE; return a counts summary.
|
|
114
|
+
|
|
115
|
+
Each UPDATE runs in its own transaction so a partial failure
|
|
116
|
+
on one column doesn't roll back successful work on previous
|
|
117
|
+
columns. `SET LOCAL ROLE plm_migrator` is issued inside each
|
|
118
|
+
transaction so the privilege check happens even when the
|
|
119
|
+
connecting user is the dev superuser. Idempotent — every
|
|
120
|
+
WHERE clause filters rows that already have the typed column
|
|
121
|
+
populated.
|
|
122
|
+
"""
|
|
123
|
+
counts = {}
|
|
124
|
+
for label, statement in (
|
|
125
|
+
("invocation_kind", _BACKFILL_INVOCATION_KIND),
|
|
126
|
+
("is_eval", _BACKFILL_IS_EVAL),
|
|
127
|
+
("parent_invocation_id", _BACKFILL_PARENT_INVOCATION_ID),
|
|
128
|
+
("root_invocation_id", _BACKFILL_ROOT_INVOCATION_ID),
|
|
129
|
+
("system_context", _BACKFILL_SYSTEM_CONTEXT),
|
|
130
|
+
):
|
|
131
|
+
with engine.begin() as conn:
|
|
132
|
+
conn.execute(text("SET LOCAL ROLE plm_migrator"))
|
|
133
|
+
params = (
|
|
134
|
+
{"valid_kinds": list(_INVOCATION_KIND_VALUES)}
|
|
135
|
+
if label == "invocation_kind"
|
|
136
|
+
else {}
|
|
137
|
+
)
|
|
138
|
+
result = conn.execute(statement, params)
|
|
139
|
+
counts[label] = result.rowcount or 0
|
|
140
|
+
|
|
141
|
+
return BackfillSummary(
|
|
142
|
+
updated_invocation_kind=counts["invocation_kind"],
|
|
143
|
+
updated_is_eval=counts["is_eval"],
|
|
144
|
+
updated_parent_invocation_id=counts["parent_invocation_id"],
|
|
145
|
+
updated_root_invocation_id=counts["root_invocation_id"],
|
|
146
|
+
updated_system_context=counts["system_context"],
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def build_engine_from_env(dsn: Optional[str] = None) -> Engine:
|
|
151
|
+
"""Resolve `PG_DSN` from env (or the explicit override) and bind."""
|
|
152
|
+
url = (dsn or os.environ.get("PG_DSN", "")).strip()
|
|
153
|
+
if not url:
|
|
154
|
+
raise RuntimeError(
|
|
155
|
+
"PG_DSN is not set. Export it before running the W1.13 "
|
|
156
|
+
"backfill (e.g. "
|
|
157
|
+
"`postgresql+psycopg://tracepulse:tracepulse@localhost:5432/tracepulse_dev`)."
|
|
158
|
+
)
|
|
159
|
+
return create_engine(url, future=True)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Lifted ``agents/`` modules (Wave 6.7 Conv A).
|
|
2
|
+
|
|
3
|
+
Per Decision #177 — these are not Protocol indirections; they are the
|
|
4
|
+
**canonical** homes of ``BaseAgent`` (abstract base class for the multi-
|
|
5
|
+
agent PDF→BPMN chain), the domain classifier / RACI extractor, and the
|
|
6
|
+
domain-specific prompt prefixes. Lifted from
|
|
7
|
+
``02_App/backend/agents/`` via ``git mv`` so sibling packages
|
|
8
|
+
(:mod:`plm_skill_packages`) can import them without depending on
|
|
9
|
+
``02_App/backend/``.
|
|
10
|
+
|
|
11
|
+
The legacy paths under ``02_App/backend/agents/`` remain as thin
|
|
12
|
+
re-export shims for the duration of the DC-1 90-day soak, matching the
|
|
13
|
+
W1.4 RunTaskTracker + Wave 6.5 Conv A/B precedent.
|
|
14
|
+
"""
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Base Agent Abstract Class
|
|
3
|
+
Defines the interface for all agents in the system.
|
|
4
|
+
All agents must inherit from this class and implement execute() and validate_input().
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BaseAgent(ABC):
|
|
15
|
+
"""
|
|
16
|
+
Abstract base class for all agents.
|
|
17
|
+
|
|
18
|
+
Provides:
|
|
19
|
+
- Unified interface (execute, validate_input, get_output_schema)
|
|
20
|
+
- Metadata tracking
|
|
21
|
+
- Structured logging
|
|
22
|
+
- NO timeout (long operations allowed)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, name: str, description: str = ""):
|
|
26
|
+
"""
|
|
27
|
+
Initialize agent.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
name: Agent unique identifier (e.g., "plm_orchestrator")
|
|
31
|
+
description: Human-readable description
|
|
32
|
+
|
|
33
|
+
Note:
|
|
34
|
+
US-471.11 dropped the `model_id` constructor parameter — it was
|
|
35
|
+
assigned but never read at dispatch time. The runtime model id is
|
|
36
|
+
sourced from `Agent.model_id` (DB) via `llm_service.generate_*`
|
|
37
|
+
keyword arguments at the call site.
|
|
38
|
+
"""
|
|
39
|
+
self.name = name
|
|
40
|
+
self.description = description
|
|
41
|
+
self.metadata: Dict[str, Any] = {}
|
|
42
|
+
self.logger = logging.getLogger(f"agents.{name}")
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def execute(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Execute the agent with given input.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
input_data: Dictionary with required fields for this agent
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Dictionary with results (structure depends on agent)
|
|
54
|
+
|
|
55
|
+
Note:
|
|
56
|
+
- Can take arbitrary time (no timeout)
|
|
57
|
+
- Must handle errors gracefully
|
|
58
|
+
- Must return a dictionary (even on error, return {"error": str})
|
|
59
|
+
"""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def validate_input(self, input_data: Dict[str, Any]) -> bool:
|
|
63
|
+
"""
|
|
64
|
+
Validate if input_data has required fields for this agent.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
input_data: Input dictionary to validate
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
True if valid, False otherwise
|
|
71
|
+
"""
|
|
72
|
+
# Default: always valid (override in subclasses)
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def get_required_fields(self) -> list[str]:
|
|
76
|
+
"""
|
|
77
|
+
Get list of required input fields.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List of field names required by validate_input()
|
|
81
|
+
"""
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
def get_output_schema(self) -> Dict[str, Any]:
|
|
85
|
+
"""
|
|
86
|
+
Get JSON schema for agent output.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
JSON schema describing output structure
|
|
90
|
+
"""
|
|
91
|
+
return {
|
|
92
|
+
"type": "object",
|
|
93
|
+
"properties": {},
|
|
94
|
+
"additionalProperties": True
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
def get_metadata(self) -> Dict[str, Any]:
|
|
98
|
+
"""Get agent metadata (execution stats, etc.)."""
|
|
99
|
+
return self.metadata.copy()
|
|
100
|
+
|
|
101
|
+
def _log_execution(self, status: str, message: str = "", error: Optional[str] = None):
|
|
102
|
+
"""Log agent execution event."""
|
|
103
|
+
if status == "start":
|
|
104
|
+
self.logger.info(f"Agent {self.name} starting execution. {message}")
|
|
105
|
+
elif status == "success":
|
|
106
|
+
self.logger.info(f"Agent {self.name} completed successfully. {message}")
|
|
107
|
+
elif status == "error":
|
|
108
|
+
self.logger.error(f"Agent {self.name} failed: {error}. {message}")
|
|
109
|
+
elif status == "warning":
|
|
110
|
+
self.logger.warning(f"Agent {self.name} warning: {message}")
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Domain Classifier Agent
|
|
3
|
+
Classifies document domain (PLM, QMS, IT, generic) and detects RACI matrices.
|
|
4
|
+
Lightweight single LLM call on first 2000 chars of document.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Any
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from plm_shared.protocols.services.llm import llm_service
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
CLASSIFIER_PROMPT = """You are a Document Domain Classifier. Analyze the document excerpt and determine:
|
|
16
|
+
|
|
17
|
+
1. The business domain:
|
|
18
|
+
- "plm": Product Lifecycle Management, engineering, manufacturing, CAD/CAM, BOM, ECO, work instructions, production
|
|
19
|
+
- "qms": Quality Management System, ISO 9001, document control, audit, compliance, quality policy
|
|
20
|
+
- "it": IT processes, ITIL, DevOps, software development, CI/CD, incident management
|
|
21
|
+
- "generic": General business process not fitting above categories
|
|
22
|
+
|
|
23
|
+
2. Whether the document contains a RACI matrix (Responsible, Accountable, Consulted, Informed)
|
|
24
|
+
|
|
25
|
+
3. Key standards, personas, and systems mentioned in the document
|
|
26
|
+
|
|
27
|
+
Be precise. Choose the single best-matching domain based on the document's primary topic."""
|
|
28
|
+
|
|
29
|
+
OUTPUT_SCHEMA = {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"domain": {"type": "string", "enum": ["plm", "qms", "it", "generic"]},
|
|
33
|
+
"domain_label": {"type": "string", "description": "Human-readable domain label, e.g. 'Quality Management System (ISO 9001)'"},
|
|
34
|
+
"confidence": {"type": "number", "minimum": 0, "maximum": 1},
|
|
35
|
+
"has_raci": {"type": "boolean", "description": "Whether the document contains a RACI matrix"},
|
|
36
|
+
"raci_section": {"type": "string", "description": "Section name/number containing the RACI matrix, or empty if none"},
|
|
37
|
+
"key_standards": {"type": "array", "items": {"type": "string"}, "description": "Standards referenced (e.g. ISO 9001:2015, AS9100)"},
|
|
38
|
+
"key_personas": {"type": "array", "items": {"type": "string"}, "description": "Key roles/personas mentioned"},
|
|
39
|
+
"key_systems": {"type": "array", "items": {"type": "string"}, "description": "Key IT/software systems mentioned"},
|
|
40
|
+
},
|
|
41
|
+
"required": ["domain", "domain_label", "confidence", "has_raci"],
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def classify_domain(document_excerpt: str) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Classify the domain of a document from its first ~2000 characters.
|
|
48
|
+
|
|
49
|
+
Returns dict with domain, confidence, has_raci, key_personas, key_systems, etc.
|
|
50
|
+
"""
|
|
51
|
+
print("[DOMAIN CLASSIFIER] Analyzing document domain...")
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
result = await llm_service.generate_structured(
|
|
55
|
+
system_prompt=CLASSIFIER_PROMPT,
|
|
56
|
+
user_prompt=f"Classify this document:\n\n{document_excerpt[:2000]}",
|
|
57
|
+
output_schema=OUTPUT_SCHEMA,
|
|
58
|
+
temperature=0.1,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
domain = result.get("domain", "generic")
|
|
62
|
+
label = result.get("domain_label", domain)
|
|
63
|
+
confidence = result.get("confidence", 0.5)
|
|
64
|
+
has_raci = result.get("has_raci", False)
|
|
65
|
+
|
|
66
|
+
print(f"[DOMAIN CLASSIFIER] Domain: {label} (confidence: {confidence:.0%})")
|
|
67
|
+
if has_raci:
|
|
68
|
+
print(f"[DOMAIN CLASSIFIER] RACI matrix detected in: {result.get('raci_section', '?')}")
|
|
69
|
+
print(f"[DOMAIN CLASSIFIER] Personas: {result.get('key_personas', [])}")
|
|
70
|
+
print(f"[DOMAIN CLASSIFIER] Systems: {result.get('key_systems', [])}")
|
|
71
|
+
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.warning(f"Domain classification failed: {e}")
|
|
76
|
+
print(f"[DOMAIN CLASSIFIER] Failed: {e} — defaulting to 'plm'")
|
|
77
|
+
return {
|
|
78
|
+
"domain": "plm",
|
|
79
|
+
"domain_label": "PLM (default fallback)",
|
|
80
|
+
"confidence": 0.0,
|
|
81
|
+
"has_raci": False,
|
|
82
|
+
"raci_section": "",
|
|
83
|
+
"key_standards": [],
|
|
84
|
+
"key_personas": [],
|
|
85
|
+
"key_systems": [],
|
|
86
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Domain-specific prompt prefixes for PLM extraction agents.
|
|
3
|
+
Injected before existing agent prompts based on document domain classification.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
DOMAIN_CONTEXT: Dict[str, Dict[str, str]] = {
|
|
9
|
+
"plm": {
|
|
10
|
+
"agent1_prefix": "",
|
|
11
|
+
"agent2_prefix": "",
|
|
12
|
+
"agent3_prefix": "",
|
|
13
|
+
"agent5_prefix": "",
|
|
14
|
+
},
|
|
15
|
+
"qms": {
|
|
16
|
+
"agent1_prefix": """DOMAIN CONTEXT — QUALITY MANAGEMENT SYSTEM (QMS)
|
|
17
|
+
This document describes a QMS process (likely ISO 9001 compliant).
|
|
18
|
+
Personas to look for: Document Owner, Reviewer(s), Approver, Quality Manager, Document Control, Users/Stakeholders, Process Owner, Auditor.
|
|
19
|
+
Systems to look for: Controlled Document Repository, Master Document List, Document Management System (DMS), ECM, SharePoint, Quality Record Archive.
|
|
20
|
+
Sections typically include: Purpose, Scope, Roles & Responsibilities, Detailed Process Description, RACI Matrix, Records, Risk Considerations, Document Control, Version History.
|
|
21
|
+
Do NOT default to PLM personas (Configuration Methods Agent, Production Manager) unless explicitly mentioned.
|
|
22
|
+
""",
|
|
23
|
+
"agent2_prefix": """DOMAIN CONTEXT — QMS DOCUMENT
|
|
24
|
+
This is a QMS document following ISO 9001 structure. Process steps are in numbered sections (e.g., 6.1, 6.2, ..., 6.11).
|
|
25
|
+
CRITICAL: Each numbered sub-section (6.1, 6.2, etc.) IS a distinct process step. Do NOT merge them into fewer steps.
|
|
26
|
+
Preserve the exact section numbering and titles in the process block names.
|
|
27
|
+
Look for: review cycles, approval workflows, document lifecycle states (Draft → Review → Approved → Released → Obsolete).
|
|
28
|
+
Assign HIGH confidence (0.9+) to clearly numbered sub-process sections with detailed step descriptions.
|
|
29
|
+
""",
|
|
30
|
+
"agent3_prefix": """DOMAIN CONTEXT — QMS PROCESS MODELING
|
|
31
|
+
Use QMS-specific personas from the document (Document Owner, Reviewer, Approver, Quality Manager, Users).
|
|
32
|
+
Do NOT substitute PLM personas (Configuration Methods Agent, Quality Inspector, Manufacturing Engineer) unless they are explicitly mentioned in the source document.
|
|
33
|
+
Decision points in QMS processes include: adequacy review pass/fail, approval granted/rejected, compliance check pass/fail.
|
|
34
|
+
Reference applicable standards (ISO 9001, clause numbers) in task descriptions when the source document mentions them.
|
|
35
|
+
Do NOT inject PLM-specific terminology (BOM, ECO, FAI, SAP routings) unless explicitly present in the source text.
|
|
36
|
+
""",
|
|
37
|
+
"agent5_prefix": """DOMAIN CONTEXT — QMS VALIDATION
|
|
38
|
+
This is a QMS process, NOT a PLM/manufacturing process.
|
|
39
|
+
Score PERTINENCE based on QMS best practices (ISO 9001 document control, compliance workflows), not PLM methodology.
|
|
40
|
+
A well-modeled QMS process IS pertinent — do NOT penalize it for not following PLM patterns (gates, phases, engineering reviews).
|
|
41
|
+
Expected keywords for QMS: "document-control", "review-approval", "compliance", "quality-management", "ISO-9001", "release", "obsolescence".
|
|
42
|
+
A complete and well-structured QMS process should score 0.8+ on pertinence.
|
|
43
|
+
""",
|
|
44
|
+
},
|
|
45
|
+
"it": {
|
|
46
|
+
"agent1_prefix": """DOMAIN CONTEXT — IT / ITSM PROCESS
|
|
47
|
+
This document describes an IT or IT Service Management process (possibly ITIL-aligned).
|
|
48
|
+
Personas to look for: Service Owner, Incident Manager, Change Manager, Release Manager, Developer, Operations Engineer, End User, CAB (Change Advisory Board).
|
|
49
|
+
Systems to look for: ServiceNow, Jira, CI/CD Pipeline, Monitoring tools (Datadog, Grafana), CMDB, Git, Jenkins, Kubernetes.
|
|
50
|
+
""",
|
|
51
|
+
"agent2_prefix": """DOMAIN CONTEXT — IT PROCESS DOCUMENT
|
|
52
|
+
Look for ITIL-style process flows: incident management, change management, release management, problem management.
|
|
53
|
+
Process steps may be organized by ticket lifecycle states, sprint phases, or deployment stages.
|
|
54
|
+
Assign HIGH confidence to clearly documented runbooks, playbooks, or SOPs with numbered steps.
|
|
55
|
+
""",
|
|
56
|
+
"agent3_prefix": """DOMAIN CONTEXT — IT PROCESS MODELING
|
|
57
|
+
Use IT-specific personas and systems from the document.
|
|
58
|
+
Decision points include: CAB approval, deployment go/no-go, rollback decisions, SLA breach escalation.
|
|
59
|
+
Do NOT inject PLM or manufacturing terminology unless explicitly present in the source.
|
|
60
|
+
""",
|
|
61
|
+
"agent5_prefix": """DOMAIN CONTEXT — IT PROCESS VALIDATION
|
|
62
|
+
This is an IT process, not PLM or QMS.
|
|
63
|
+
Score PERTINENCE based on IT best practices (ITIL, DevOps), not PLM methodology.
|
|
64
|
+
Expected keywords: "incident-management", "change-management", "deployment", "monitoring", "SLA", "ITIL".
|
|
65
|
+
""",
|
|
66
|
+
},
|
|
67
|
+
"generic": {
|
|
68
|
+
"agent1_prefix": """DOMAIN CONTEXT — GENERAL BUSINESS PROCESS
|
|
69
|
+
This document describes a general business process. Extract personas, systems, and process steps as described.
|
|
70
|
+
Do not assume any specific domain terminology. Use the personas and systems explicitly mentioned in the document.
|
|
71
|
+
""",
|
|
72
|
+
"agent2_prefix": """DOMAIN CONTEXT — GENERAL BUSINESS DOCUMENT
|
|
73
|
+
Extract process steps as described in the document without domain-specific assumptions.
|
|
74
|
+
Numbered sections or bullet lists describing sequential activities are process steps.
|
|
75
|
+
""",
|
|
76
|
+
"agent3_prefix": """DOMAIN CONTEXT — GENERAL PROCESS MODELING
|
|
77
|
+
Use personas and systems exactly as mentioned in the source document.
|
|
78
|
+
Do NOT inject domain-specific terminology (PLM, QMS, ITIL) unless present in the source.
|
|
79
|
+
""",
|
|
80
|
+
"agent5_prefix": """DOMAIN CONTEXT — GENERAL PROCESS VALIDATION
|
|
81
|
+
This is a general business process. Score PERTINENCE based on general business process quality.
|
|
82
|
+
Do NOT penalize for not following PLM-specific methodology.
|
|
83
|
+
""",
|
|
84
|
+
},
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_domain_prefix(domain: str, agent_key: str) -> str:
|
|
89
|
+
"""Get the domain-specific prompt prefix for a given agent.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
domain: Domain identifier (plm, qms, it, generic)
|
|
93
|
+
agent_key: Agent prefix key (agent1_prefix, agent2_prefix, agent3_prefix, agent5_prefix)
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Prompt prefix string (empty for PLM domain since prompts are already PLM-oriented)
|
|
97
|
+
"""
|
|
98
|
+
ctx = DOMAIN_CONTEXT.get(domain, DOMAIN_CONTEXT["generic"])
|
|
99
|
+
return ctx.get(agent_key, "")
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RACI Matrix Extractor Agent
|
|
3
|
+
Extracts RACI assignments from a document when a RACI matrix is detected.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict, Any, List
|
|
7
|
+
import logging
|
|
8
|
+
|
|
9
|
+
from plm_shared.protocols.services.llm import llm_service
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
RACI_PROMPT = """You are a RACI Matrix Extraction Specialist. Extract the RACI matrix from the document.
|
|
14
|
+
|
|
15
|
+
For each sub-process or activity row in the RACI table, extract:
|
|
16
|
+
- The sub-process identifier and name (e.g. "6.1 Need Identification")
|
|
17
|
+
- The RACI assignments for each role/persona
|
|
18
|
+
|
|
19
|
+
RACI values:
|
|
20
|
+
- "R" = Responsible (does the work)
|
|
21
|
+
- "A" = Accountable (owns the outcome, signs off)
|
|
22
|
+
- "C" = Consulted (provides input before)
|
|
23
|
+
- "I" = Informed (notified after)
|
|
24
|
+
|
|
25
|
+
For combined values like "A/R", use the PRIMARY role (the first one listed).
|
|
26
|
+
If a cell is empty or has no assignment, omit that persona.
|
|
27
|
+
|
|
28
|
+
Extract EVERY row from the RACI table. Do not skip or merge rows."""
|
|
29
|
+
|
|
30
|
+
OUTPUT_SCHEMA = {
|
|
31
|
+
"type": "object",
|
|
32
|
+
"properties": {
|
|
33
|
+
"raci_entries": {
|
|
34
|
+
"type": "array",
|
|
35
|
+
"items": {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"properties": {
|
|
38
|
+
"sub_process": {"type": "string", "description": "Sub-process name, e.g. '6.1 Need Identification'"},
|
|
39
|
+
"assignments": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"additionalProperties": {"type": "string", "enum": ["R", "A", "C", "I"]},
|
|
42
|
+
"description": "Mapping of persona name to RACI role",
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
"required": ["sub_process", "assignments"],
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
"required": ["raci_entries"],
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
async def extract_raci(document_text: str) -> Dict[str, Any]:
|
|
54
|
+
"""
|
|
55
|
+
Extract RACI matrix from document text.
|
|
56
|
+
|
|
57
|
+
Returns dict with raci_entries list.
|
|
58
|
+
"""
|
|
59
|
+
print("[RACI EXTRACTOR] Extracting RACI matrix...")
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
result = await llm_service.generate_structured(
|
|
63
|
+
system_prompt=RACI_PROMPT,
|
|
64
|
+
user_prompt=f"Extract the RACI matrix from this document:\n\n{document_text}",
|
|
65
|
+
output_schema=OUTPUT_SCHEMA,
|
|
66
|
+
temperature=0.1,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
entries = result.get("raci_entries", [])
|
|
70
|
+
print(f"[RACI EXTRACTOR] Extracted {len(entries)} RACI entries")
|
|
71
|
+
for entry in entries:
|
|
72
|
+
roles = ", ".join(f"{k}={v}" for k, v in entry.get("assignments", {}).items())
|
|
73
|
+
print(f" - {entry.get('sub_process', '?')}: {roles}")
|
|
74
|
+
|
|
75
|
+
return result
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.warning(f"RACI extraction failed: {e}")
|
|
79
|
+
print(f"[RACI EXTRACTOR] Failed: {e}")
|
|
80
|
+
return {"raci_entries": []}
|