ibm-watsonx-orchestrate 1.10.0b0__py3-none-any.whl → 1.10.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_orchestrate/__init__.py +1 -1
- ibm_watsonx_orchestrate/agent_builder/connections/__init__.py +1 -1
- ibm_watsonx_orchestrate/agent_builder/connections/connections.py +1 -1
- ibm_watsonx_orchestrate/agent_builder/connections/types.py +16 -12
- ibm_watsonx_orchestrate/agent_builder/knowledge_bases/types.py +47 -3
- ibm_watsonx_orchestrate/agent_builder/toolkits/types.py +18 -15
- ibm_watsonx_orchestrate/agent_builder/tools/types.py +1 -1
- ibm_watsonx_orchestrate/cli/commands/connections/connections_command.py +7 -7
- ibm_watsonx_orchestrate/cli/commands/connections/connections_controller.py +36 -26
- ibm_watsonx_orchestrate/cli/commands/knowledge_bases/knowledge_bases_controller.py +32 -10
- ibm_watsonx_orchestrate/cli/commands/server/server_command.py +95 -14
- ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_command.py +43 -10
- ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_controller.py +52 -25
- ibm_watsonx_orchestrate/client/connections/connections_client.py +4 -3
- ibm_watsonx_orchestrate/client/knowledge_bases/knowledge_base_client.py +4 -4
- ibm_watsonx_orchestrate/docker/compose-lite.yml +48 -13
- ibm_watsonx_orchestrate/docker/default.env +18 -13
- ibm_watsonx_orchestrate/flow_builder/flows/__init__.py +2 -0
- ibm_watsonx_orchestrate/flow_builder/flows/flow.py +91 -12
- ibm_watsonx_orchestrate/flow_builder/node.py +39 -15
- ibm_watsonx_orchestrate/flow_builder/types.py +114 -25
- ibm_watsonx_orchestrate/run/connections.py +2 -2
- {ibm_watsonx_orchestrate-1.10.0b0.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/METADATA +1 -1
- {ibm_watsonx_orchestrate-1.10.0b0.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/RECORD +27 -27
- {ibm_watsonx_orchestrate-1.10.0b0.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate-1.10.0b0.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/entry_points.txt +0 -0
- {ibm_watsonx_orchestrate-1.10.0b0.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -57,13 +57,14 @@ EVENT_BROKER_TTL="3600"
|
|
57
57
|
REGISTRY_URL=
|
58
58
|
|
59
59
|
|
60
|
-
|
60
|
+
|
61
|
+
SERVER_TAG=20-08-2025-cf6e342
|
61
62
|
SERVER_REGISTRY=
|
62
63
|
|
63
|
-
WORKER_TAG=
|
64
|
+
WORKER_TAG=20-08-2025-cf6e342
|
64
65
|
WORKER_REGISTRY=
|
65
66
|
|
66
|
-
AI_GATEWAY_TAG=
|
67
|
+
AI_GATEWAY_TAG=20-08-2025-9ed6d40
|
67
68
|
AI_GATEWAY_REGISTRY=
|
68
69
|
|
69
70
|
AGENT_GATEWAY_TAG=29-07-2025
|
@@ -82,16 +83,16 @@ UITAG=31-07-2025
|
|
82
83
|
CM_REGISTRY=
|
83
84
|
CM_TAG=24-07-2025
|
84
85
|
|
85
|
-
TRM_TAG=
|
86
|
+
TRM_TAG=19-08-2025-fe105eb0b950ff304f712a1a5b9fa3cba92d09da
|
86
87
|
TRM_REGISTRY=
|
87
88
|
|
88
|
-
TR_TAG=
|
89
|
+
TR_TAG=19-08-2025-fe105eb0b950ff304f712a1a5b9fa3cba92d09da
|
89
90
|
TR_REGISTRY=
|
90
91
|
|
91
|
-
BUILDER_TAG=
|
92
|
+
BUILDER_TAG=19-08-2025-1a79d34
|
92
93
|
BUILDER_REGISTRY=
|
93
94
|
|
94
|
-
FLOW_RUNTIME_TAG=
|
95
|
+
FLOW_RUNTIME_TAG=18-08-2025-v2
|
95
96
|
FLOW_RUMTIME_REGISTRY=
|
96
97
|
|
97
98
|
|
@@ -104,20 +105,23 @@ JAEGER_PROXY_REGISTRY=
|
|
104
105
|
SOCKET_HANDLER_TAG=29-05-2025
|
105
106
|
SOCKET_HANDLER_REGISTRY=
|
106
107
|
|
107
|
-
CPE_TAG=
|
108
|
+
CPE_TAG=18-08-2025-ae1308e
|
108
109
|
CPE_REGISTRY=
|
109
110
|
|
110
111
|
VOICE_CONTROLLER_TAG=12-08-2025
|
111
112
|
VOICE_CONTROLLER_REGISTRY=
|
112
113
|
|
114
|
+
LANGFLOW_TAG=
|
115
|
+
LANGFLOW_IMAGE=
|
116
|
+
|
113
117
|
# IBM Document Processing
|
114
|
-
WDU_TAG=2.
|
118
|
+
WDU_TAG=2.6.1
|
115
119
|
WDU_REGISTRY=
|
116
120
|
|
117
|
-
DOCPROC_DPS_TAG=
|
118
|
-
DOCPROC_LLMSERVICE_TAG=
|
119
|
-
DOCPROC_CACHE_TAG=
|
120
|
-
DOCPROC_DPI_TAG=
|
121
|
+
DOCPROC_DPS_TAG=20250815-010747-277-173db2a
|
122
|
+
DOCPROC_LLMSERVICE_TAG=20250820-153924-128-55cf4d5
|
123
|
+
DOCPROC_CACHE_TAG=20250814-master-82-cf33f87
|
124
|
+
DOCPROC_DPI_TAG=20250815-004755-273-e65f26b4
|
121
125
|
DOCPROC_REGISTRY=
|
122
126
|
|
123
127
|
# END -- IMAGE REGISTRIES AND TAGS
|
@@ -152,6 +156,7 @@ TOOLS_RUNTIME_MANAGER_BASE_URL="http://tools-runtime-manager:8080"
|
|
152
156
|
CONNECTION_SERVICE_BASE_URL="http://wxo-server-connection-manager:3001"
|
153
157
|
AI_GATEWAY_BASE_URL="http://ai-gateway:8787/v1"
|
154
158
|
AI_GATEWAY_ENABLED=True
|
159
|
+
DEFAULT_AGENT_ENABLED=True
|
155
160
|
AGENT_GATEWAY_URI="http://wxo-agent-gateway:8989"
|
156
161
|
DEFAULT_TENANT_ID=10000000-0000-0000-0000-000000000000
|
157
162
|
ES_USERNAME=elastic
|
@@ -25,13 +25,13 @@ from ibm_watsonx_orchestrate.client.tools.tool_client import ToolClient
|
|
25
25
|
from ibm_watsonx_orchestrate.client.tools.tempus_client import TempusClient
|
26
26
|
from ibm_watsonx_orchestrate.client.utils import instantiate_client
|
27
27
|
from ..types import (
|
28
|
-
EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PromptLLMParameters, PromptNodeSpec,
|
28
|
+
DocProcKVPSchema, EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PlainTextReadingOrder, PromptLLMParameters, PromptNodeSpec, TimerNodeSpec,
|
29
29
|
StartNodeSpec, ToolSpec, JsonSchemaObject, ToolRequestBody, ToolResponseBody, UserFieldKind, UserFieldOption, UserFlowSpec, UserNodeSpec, WaitPolicy,
|
30
|
-
DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File
|
30
|
+
DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File, DocumentClassificationResponse, DocClassifierSpec, DocumentProcessingCommonInput
|
31
31
|
)
|
32
32
|
from .constants import CURRENT_USER, START, END, ANY_USER
|
33
33
|
from ..node import (
|
34
|
-
EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode
|
34
|
+
EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode, DocClassifierNode
|
35
35
|
)
|
36
36
|
from ..types import (
|
37
37
|
AgentNodeSpec, extract_node_spec, FlowContext, FlowEventType, FlowEvent, FlowSpec,
|
@@ -438,23 +438,95 @@ class Flow(Node):
|
|
438
438
|
node = self._add_node(node)
|
439
439
|
return cast(PromptNode, node)
|
440
440
|
|
441
|
+
def docclassfier(self,
|
442
|
+
name: str,
|
443
|
+
llm : str = "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
|
444
|
+
version: str = "TIP",
|
445
|
+
display_name: str| None = None,
|
446
|
+
classes: type[BaseModel]| None = None,
|
447
|
+
description: str | None = None,
|
448
|
+
min_confidence: float = 0.0,
|
449
|
+
input_map: DataMap = None) -> DocClassifierNode:
|
450
|
+
|
451
|
+
if name is None :
|
452
|
+
raise ValueError("name must be provided.")
|
453
|
+
|
454
|
+
doc_classifier_config = DocClassifierNode.generate_config(llm=llm, min_confidence=min_confidence,input_classes=classes)
|
455
|
+
|
456
|
+
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocumentProcessingCommonInput)
|
457
|
+
output_schema_obj = _get_json_schema_obj(parameter_name = "output", type_def = DocumentClassificationResponse)
|
458
|
+
|
459
|
+
if "$defs" in output_schema_obj.model_extra:
|
460
|
+
output_schema_obj.model_extra.pop("$defs")
|
461
|
+
# Create the docclassifier spec
|
462
|
+
task_spec = DocClassifierSpec(
|
463
|
+
name=name,
|
464
|
+
display_name=display_name if display_name is not None else name,
|
465
|
+
description=description,
|
466
|
+
input_schema=_get_tool_request_body(input_schema_obj),
|
467
|
+
output_schema=_get_tool_response_body(output_schema_obj),
|
468
|
+
output_schema_object = output_schema_obj,
|
469
|
+
config=doc_classifier_config,
|
470
|
+
version=version
|
471
|
+
)
|
472
|
+
node = DocClassifierNode(spec=task_spec)
|
473
|
+
|
474
|
+
# setup input map
|
475
|
+
if input_map:
|
476
|
+
node.input_map = self._get_data_map(input_map)
|
477
|
+
|
478
|
+
# add the node to the list of node
|
479
|
+
|
480
|
+
node = self._add_node(node)
|
481
|
+
return cast(DocClassifierNode, node)
|
482
|
+
|
483
|
+
|
484
|
+
def timer(self,
|
485
|
+
name: str,
|
486
|
+
delay: int,
|
487
|
+
display_name: str | None = None,
|
488
|
+
description: str | None = None,
|
489
|
+
input_map: DataMap = None) -> Node:
|
490
|
+
|
491
|
+
if name is None:
|
492
|
+
raise ValueError("name must be provided.")
|
493
|
+
if delay < 0:
|
494
|
+
raise ValueError("delay must be non-negative.")
|
495
|
+
|
496
|
+
timer_spec = TimerNodeSpec(
|
497
|
+
name=name,
|
498
|
+
display_name=display_name if display_name is not None else name,
|
499
|
+
description=description,
|
500
|
+
delay=delay
|
501
|
+
)
|
502
|
+
|
503
|
+
node = Node(spec=timer_spec)
|
504
|
+
|
505
|
+
if input_map:
|
506
|
+
node.input_map = self._get_data_map(input_map)
|
507
|
+
|
508
|
+
node = self._add_node(node)
|
509
|
+
return node
|
510
|
+
|
511
|
+
|
441
512
|
def docext(self,
|
442
513
|
name: str,
|
443
|
-
llm : str = "meta-llama/llama-3-2-
|
514
|
+
llm : str = "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
|
444
515
|
version: str = "TIP",
|
445
516
|
display_name: str| None = None,
|
446
|
-
|
517
|
+
fields: type[BaseModel]| None = None,
|
447
518
|
description: str | None = None,
|
448
|
-
input_map: DataMap = None
|
519
|
+
input_map: DataMap = None,
|
520
|
+
enable_hw: bool = False) -> tuple[DocExtNode, type[BaseModel]]:
|
449
521
|
|
450
522
|
if name is None :
|
451
523
|
raise ValueError("name must be provided.")
|
452
524
|
|
453
|
-
doc_ext_config = DocExtNode.generate_config(llm=llm,
|
525
|
+
doc_ext_config = DocExtNode.generate_config(llm=llm, fields=fields)
|
454
526
|
|
455
|
-
DocExtFieldValue = DocExtNode.generate_docext_field_value_model(
|
527
|
+
DocExtFieldValue = DocExtNode.generate_docext_field_value_model(fields=fields)
|
456
528
|
|
457
|
-
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def =
|
529
|
+
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocumentProcessingCommonInput)
|
458
530
|
output_schema_obj = _get_json_schema_obj("output", DocExtFieldValue)
|
459
531
|
|
460
532
|
if "$defs" in output_schema_obj.model_extra:
|
@@ -469,7 +541,8 @@ class Flow(Node):
|
|
469
541
|
output_schema=_get_tool_response_body(output_schema_obj),
|
470
542
|
output_schema_object = output_schema_obj,
|
471
543
|
config=doc_ext_config,
|
472
|
-
version=version
|
544
|
+
version=version,
|
545
|
+
enable_hw=enable_hw
|
473
546
|
)
|
474
547
|
node = DocExtNode(spec=task_spec)
|
475
548
|
|
@@ -528,9 +601,12 @@ class Flow(Node):
|
|
528
601
|
def docproc(self,
|
529
602
|
name: str,
|
530
603
|
task: str,
|
604
|
+
plain_text_reading_order : PlainTextReadingOrder = PlainTextReadingOrder.block_structure,
|
531
605
|
display_name: str|None=None,
|
532
606
|
description: str | None = None,
|
533
|
-
input_map: DataMap = None
|
607
|
+
input_map: DataMap = None,
|
608
|
+
kvp_schemas: list[DocProcKVPSchema] = None,
|
609
|
+
enable_hw: bool = False) -> DocProcNode:
|
534
610
|
|
535
611
|
if name is None :
|
536
612
|
raise ValueError("name must be provided.")
|
@@ -552,7 +628,10 @@ class Flow(Node):
|
|
552
628
|
input_schema=_get_tool_request_body(input_schema_obj),
|
553
629
|
output_schema=_get_tool_response_body(output_schema_obj),
|
554
630
|
output_schema_object = output_schema_obj,
|
555
|
-
task=task
|
631
|
+
task=task,
|
632
|
+
plain_text_reading_order=plain_text_reading_order,
|
633
|
+
enable_hw=enable_hw,
|
634
|
+
kvp_schemas=kvp_schemas
|
556
635
|
)
|
557
636
|
|
558
637
|
node = DocProcNode(spec=task_spec)
|
@@ -6,8 +6,8 @@ import yaml
|
|
6
6
|
from pydantic import BaseModel, Field, SerializeAsAny, create_model
|
7
7
|
from enum import Enum
|
8
8
|
|
9
|
-
from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
|
10
|
-
DocExtSpec, DocExtConfig,
|
9
|
+
from .types import DocExtConfigField, EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, TimerNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
|
10
|
+
DocExtSpec, DocExtConfig, DocClassifierSpec, DecisionsNodeSpec, DocClassifierConfig
|
11
11
|
|
12
12
|
from .data_map import DataMap
|
13
13
|
|
@@ -120,6 +120,24 @@ class DocProcNode(Node):
|
|
120
120
|
def get_spec(self) -> DocProcSpec:
|
121
121
|
return cast(DocProcSpec, self.spec)
|
122
122
|
|
123
|
+
class DocClassifierNode(Node):
|
124
|
+
def __repr__(self):
|
125
|
+
return f"DocClassifierNode(name='{self.spec.name}', description='{self.spec.description}')"
|
126
|
+
|
127
|
+
def get_spec(self) -> DocClassifierSpec:
|
128
|
+
return cast(DocClassifierSpec, self.spec)
|
129
|
+
|
130
|
+
@staticmethod
|
131
|
+
def generate_config(llm: str, input_classes: type[BaseModel], min_confidence: float) -> DocClassifierConfig:
|
132
|
+
return DocClassifierConfig(llm=llm, classes=input_classes.__dict__.values(), min_confidence=min_confidence)
|
133
|
+
|
134
|
+
class TimerNode(Node):
|
135
|
+
def __repr__(self):
|
136
|
+
return f"TimerNode(name='{self.spec.name}', description='{self.spec.description}')"
|
137
|
+
|
138
|
+
def get_spec(self) -> TimerNodeSpec:
|
139
|
+
return cast(TimerNodeSpec, self.spec)
|
140
|
+
|
123
141
|
class DocExtNode(Node):
|
124
142
|
def __repr__(self):
|
125
143
|
return f"DocExtNode(name='{self.spec.name}', description='{self.spec.description}')"
|
@@ -128,23 +146,29 @@ class DocExtNode(Node):
|
|
128
146
|
return cast(DocExtSpec, self.spec)
|
129
147
|
|
130
148
|
@staticmethod
|
131
|
-
def generate_config(llm: str,
|
132
|
-
|
133
|
-
return DocExtConfig(llm=llm, entities=entities)
|
149
|
+
def generate_config(llm: str, fields: type[BaseModel]) -> DocExtConfig:
|
150
|
+
return DocExtConfig(llm=llm, fields=fields.__dict__.values())
|
134
151
|
|
135
152
|
@staticmethod
|
136
|
-
def generate_docext_field_value_model(
|
153
|
+
def generate_docext_field_value_model(fields: type[BaseModel]) -> type[BaseModel]:
|
137
154
|
create_field_value_description = lambda field_name: "Extracted value for " + field_name
|
155
|
+
field_definitions = {}
|
156
|
+
|
157
|
+
for name, value in fields.model_dump().items():
|
158
|
+
field_type = str
|
159
|
+
field_kwargs = {
|
160
|
+
"title": value['name'],
|
161
|
+
"description": create_field_value_description(value['name']),
|
162
|
+
"type": value["type"] if value["type"] != "date" else "string"
|
163
|
+
}
|
164
|
+
|
165
|
+
# Add json_schema_extra if type is 'date'
|
166
|
+
if value["type"] == "date":
|
167
|
+
field_kwargs["json_schema_extra"] = {"format": "date"}
|
168
|
+
|
169
|
+
field_definitions[name] = (field_type, Field(**field_kwargs))
|
138
170
|
|
139
|
-
DocExtFieldValue = create_model(
|
140
|
-
"DocExtFieldValue",
|
141
|
-
**{
|
142
|
-
name: (str, Field(
|
143
|
-
title=value['name'],
|
144
|
-
description=create_field_value_description(value['name']),
|
145
|
-
)
|
146
|
-
)
|
147
|
-
for name, value in input_entities.model_dump().items()})
|
171
|
+
DocExtFieldValue = create_model("DocExtFieldValue", **field_definitions)
|
148
172
|
return DocExtFieldValue
|
149
173
|
|
150
174
|
class DecisionsNode(Node):
|
@@ -4,12 +4,16 @@ from datetime import date
|
|
4
4
|
import numbers
|
5
5
|
import inspect
|
6
6
|
import logging
|
7
|
+
import uuid
|
8
|
+
import re
|
9
|
+
import time
|
7
10
|
from typing import (
|
8
11
|
Annotated, Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union, NewType
|
9
12
|
)
|
10
13
|
from typing_extensions import Doc
|
11
14
|
|
12
15
|
import docstring_parser
|
16
|
+
from pydantic import computed_field, field_validator
|
13
17
|
from pydantic import BaseModel, Field, GetCoreSchemaHandler, GetJsonSchemaHandler, RootModel
|
14
18
|
from pydantic_core import core_schema
|
15
19
|
from pydantic.json_schema import JsonSchemaValue
|
@@ -124,7 +128,7 @@ def _to_json_from_output_schema(schema: Union[ToolResponseBody, SchemaRef]) -> d
|
|
124
128
|
return model_spec
|
125
129
|
|
126
130
|
class NodeSpec(BaseModel):
|
127
|
-
kind: Literal["node", "tool", "user", "agent", "flow", "start", "decisions", "prompt", "branch", "wait", "foreach", "loop", "userflow", "end", "docproc" ] = "node"
|
131
|
+
kind: Literal["node", "tool", "user", "agent", "flow", "start", "decisions", "prompt", "timer", "branch", "wait", "foreach", "loop", "userflow", "end", "docproc", "docext", "docclassifier" ] = "node"
|
128
132
|
name: str
|
129
133
|
display_name: str | None = None
|
130
134
|
description: str | None = None
|
@@ -170,7 +174,7 @@ class NodeSpec(BaseModel):
|
|
170
174
|
|
171
175
|
return model_spec
|
172
176
|
|
173
|
-
class
|
177
|
+
class DocExtConfigField(BaseModel):
|
174
178
|
name: str = Field(description="Entity name")
|
175
179
|
type: Literal["string", "date", "number"] = Field(default="string", description="The type of the entity values")
|
176
180
|
description: str = Field(title="Description", description="Description of the entity", default="")
|
@@ -180,18 +184,89 @@ class DocExtConfigEntity(BaseModel):
|
|
180
184
|
examples: list[str] = Field(title="Examples", description="Examples that help the LLM understand the expected entity mentions", default=[])
|
181
185
|
|
182
186
|
class DocExtConfig(BaseModel):
|
183
|
-
domain: str = Field(description="
|
187
|
+
domain: str = Field(description="Domain of the document", default="other")
|
184
188
|
type: str = Field(description="Document type", default="agreement")
|
185
189
|
llm: str = Field(description="The LLM used for the document extraction", default="meta-llama/llama-3-2-11b-vision-instruct")
|
186
|
-
|
190
|
+
fields: list[DocExtConfigField] = Field(default=[])
|
187
191
|
|
188
192
|
class LanguageCode(StrEnum):
|
189
193
|
en = auto()
|
190
194
|
fr = auto()
|
191
195
|
|
196
|
+
class DocProcTask(StrEnum):
|
197
|
+
'''
|
198
|
+
Possible names for the Document processing task parameter
|
199
|
+
'''
|
200
|
+
text_extraction = auto()
|
201
|
+
custom_field_extraction = auto()
|
202
|
+
custom_document_classification = auto()
|
203
|
+
|
204
|
+
class CustomClassOutput(BaseModel):
|
205
|
+
class_name: str = Field(
|
206
|
+
title="Class Name",
|
207
|
+
description="Class Name of the Document",
|
208
|
+
default=[],
|
209
|
+
)
|
210
|
+
|
211
|
+
class DocumentClassificationResponse(BaseModel):
|
212
|
+
custom_class_response: CustomClassOutput = Field(
|
213
|
+
title="Custom Classification",
|
214
|
+
description="The Class extracted by the llm",
|
215
|
+
)
|
216
|
+
|
217
|
+
class DocClassifierClass(BaseModel):
|
218
|
+
class_name: str = Field(title='Class Name', description="The predicted, normalized document class name based on provided name")
|
219
|
+
|
220
|
+
@field_validator("class_name", mode="before")
|
221
|
+
@classmethod
|
222
|
+
def normalize_name(cls, name) -> str:
|
223
|
+
pattern = r'^[a-zA-Z0-9_]{1,29}$'
|
224
|
+
if not re.match(pattern, name):
|
225
|
+
raise ValueError(f"class_name \"{name}\" is not valid. class_name should contain only letters (a-z, A-Z), digits (0-9), and underscores (_)")
|
226
|
+
return name
|
227
|
+
|
228
|
+
@computed_field(description="A uuid for identifying classes, For easy filtering of documents classified in a class", return_type=str)
|
229
|
+
def class_id(self) -> str:
|
230
|
+
return str(uuid.uuid5(uuid.uuid1(), self.class_name + str(time.time())))
|
231
|
+
|
232
|
+
class DocClassifierConfig(BaseModel):
|
233
|
+
domain: str = Field(description="Domain of the document", default="other",title="Domain")
|
234
|
+
type: Literal["class_configuration"] = Field(description="Document type", default="class_configuration",title="Type")
|
235
|
+
llm: str = Field(description="The LLM used for the document classfier", default="watsonx/meta-llama/llama-3-2-11b-vision-instruct",title="LLM")
|
236
|
+
min_confidence: float = Field(description="The minimal confidence acceptable for an extracted field value", default=0.0,le=1.0, ge=0.0 ,title="Minimum Confidence")
|
237
|
+
classes: list[DocClassifierClass] = Field(default=[], description="Classes which are needed to classify provided by user", title="Classes")
|
238
|
+
|
192
239
|
class DocProcCommonNodeSpec(NodeSpec):
|
240
|
+
task: DocProcTask = Field(description='The document processing operation name', default=DocProcTask.text_extraction)
|
193
241
|
enable_hw: bool | None = Field(description="Boolean value indicating if hand-written feature is enabled.", title="Enable handwritten", default=False)
|
194
242
|
|
243
|
+
def __init__(self, **data):
|
244
|
+
super().__init__(**data)
|
245
|
+
|
246
|
+
def to_json(self) -> dict[str, Any]:
|
247
|
+
model_spec = super().to_json()
|
248
|
+
model_spec["task"] = self.task
|
249
|
+
model_spec["enable_hw"] = self.enable_hw
|
250
|
+
|
251
|
+
return model_spec
|
252
|
+
|
253
|
+
|
254
|
+
|
255
|
+
class DocClassifierSpec(DocProcCommonNodeSpec):
|
256
|
+
version : str = Field(description="A version of the spec")
|
257
|
+
config : DocClassifierConfig
|
258
|
+
|
259
|
+
def __init__(self, **data):
|
260
|
+
super().__init__(**data)
|
261
|
+
self.kind = "docclassifier"
|
262
|
+
|
263
|
+
def to_json(self) -> dict[str, Any]:
|
264
|
+
model_spec = super().to_json()
|
265
|
+
model_spec["version"] = self.version
|
266
|
+
model_spec["config"] = self.config.model_dump()
|
267
|
+
model_spec["task"] = DocProcTask.custom_document_classification
|
268
|
+
return model_spec
|
269
|
+
|
195
270
|
class DocExtSpec(DocProcCommonNodeSpec):
|
196
271
|
version : str = Field(description="A version of the spec")
|
197
272
|
config : DocExtConfig
|
@@ -204,6 +279,7 @@ class DocExtSpec(DocProcCommonNodeSpec):
|
|
204
279
|
model_spec = super().to_json()
|
205
280
|
model_spec["version"] = self.version
|
206
281
|
model_spec["config"] = self.config.model_dump()
|
282
|
+
model_spec["task"] = DocProcTask.custom_field_extraction
|
207
283
|
return model_spec
|
208
284
|
|
209
285
|
class DocProcField(BaseModel):
|
@@ -251,19 +327,17 @@ class DocProcKVP(BaseModel):
|
|
251
327
|
table_name: Optional[str] = Field(default=None, description="The name of the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
|
252
328
|
table_row_index: Optional[int] = Field(default=None, description="The index of the row in the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
|
253
329
|
|
254
|
-
class
|
255
|
-
|
256
|
-
|
257
|
-
'''
|
258
|
-
text_extraction = auto()
|
330
|
+
class PlainTextReadingOrder(StrEnum):
|
331
|
+
block_structure = auto()
|
332
|
+
simple_line = auto()
|
259
333
|
|
260
334
|
class DocProcSpec(DocProcCommonNodeSpec):
|
261
|
-
|
262
|
-
kvp_schema: List[DocProcKVPSchema] | None = Field(
|
335
|
+
kvp_schemas: List[DocProcKVPSchema] | None = Field(
|
263
336
|
title='KVP schemas',
|
264
337
|
description="Optional list of key-value pair schemas to use for extraction.",
|
265
338
|
default=None)
|
266
|
-
|
339
|
+
plain_text_reading_order : PlainTextReadingOrder = Field(default=PlainTextReadingOrder.block_structure)
|
340
|
+
|
267
341
|
def __init__(self, **data):
|
268
342
|
super().__init__(**data)
|
269
343
|
self.kind = "docproc"
|
@@ -271,8 +345,12 @@ class DocProcSpec(DocProcCommonNodeSpec):
|
|
271
345
|
def to_json(self) -> dict[str, Any]:
|
272
346
|
model_spec = super().to_json()
|
273
347
|
model_spec["task"] = self.task
|
348
|
+
if self.plain_text_reading_order != PlainTextReadingOrder.block_structure:
|
349
|
+
model_spec["plain_text_reading_order"] = self.plain_text_reading_order
|
350
|
+
if self.kvp_schemas is not None:
|
351
|
+
model_spec["kvp_schemas"] = self.kvp_schemas
|
274
352
|
return model_spec
|
275
|
-
|
353
|
+
|
276
354
|
class StartNodeSpec(NodeSpec):
|
277
355
|
def __init__(self, **data):
|
278
356
|
super().__init__(**data)
|
@@ -607,6 +685,18 @@ class PromptNodeSpec(NodeSpec):
|
|
607
685
|
|
608
686
|
return model_spec
|
609
687
|
|
688
|
+
class TimerNodeSpec(NodeSpec):
|
689
|
+
delay: int
|
690
|
+
|
691
|
+
def __init__(self, **kwargs):
|
692
|
+
super().__init__(**kwargs)
|
693
|
+
self.kind = "timer"
|
694
|
+
|
695
|
+
def to_json(self) -> dict[str, Any]:
|
696
|
+
model_spec = super().to_json()
|
697
|
+
if self.delay:
|
698
|
+
model_spec["delay"] = self.delay
|
699
|
+
return model_spec
|
610
700
|
|
611
701
|
class Expression(BaseModel):
|
612
702
|
'''An expression could return a boolean or a value'''
|
@@ -866,24 +956,25 @@ class File(str):
|
|
866
956
|
"wrap_data": False,
|
867
957
|
"required": []
|
868
958
|
}
|
869
|
-
|
870
|
-
|
871
|
-
|
959
|
+
class DocumentProcessingCommonInput(BaseModel):
|
960
|
+
'''
|
961
|
+
This class represents the common input of docext, docproc and docclassifier node
|
872
962
|
|
963
|
+
Attributes:
|
964
|
+
document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
|
965
|
+
'''
|
966
|
+
document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
|
873
967
|
|
874
|
-
class DocProcInput(
|
968
|
+
class DocProcInput(DocumentProcessingCommonInput):
|
875
969
|
'''
|
876
970
|
This class represents the input of a Document processing task.
|
877
971
|
|
878
972
|
Attributes:
|
879
|
-
document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
|
880
|
-
language (LanguageCode): Optional language code used when processing the input document
|
881
973
|
kvp_schemas (List[DocProcKVPSchema]): Optional list of key-value pair schemas to use for extraction. If not provided or None, no KVPs will be extracted. If an empty list is provided, we will use the internal schemas to extract KVPS.
|
882
974
|
'''
|
883
975
|
# This is declared as bytes but the runtime will understand if a URL is send in as input.
|
884
976
|
# We need to use bytes here for Chat-with-doc to recognize the input as a File.
|
885
|
-
|
886
|
-
kvp_schemas: Optional[List[DocProcKVPSchema]] = Field(
|
977
|
+
kvp_schemas: Optional[List[DocProcKVPSchema]] | str = Field(
|
887
978
|
title='KVP schemas',
|
888
979
|
description="Optional list of key-value pair schemas to use for extraction.",
|
889
980
|
default=None)
|
@@ -892,11 +983,9 @@ class TextExtractionResponse(BaseModel):
|
|
892
983
|
'''
|
893
984
|
The text extraction operation response.
|
894
985
|
Attributes:
|
895
|
-
|
896
|
-
kvps (Optional[list[DocProcKVP]]): A list of key-value pairs extracted from the document. If no KVPs were extracted, this will be None.
|
986
|
+
output_file_ref (str): The url to the file that contains the extracted text and kvps.
|
897
987
|
'''
|
898
|
-
|
899
|
-
kvps: Optional[list[DocProcKVP]] = Field(description="A list of key-value pairs extracted from the document.", default=None)
|
988
|
+
output_file_ref: str = Field(description='The url to the file that contains the extracted text and kvps.', title="output_file_ref")
|
900
989
|
|
901
990
|
|
902
991
|
class DecisionsCondition(BaseModel):
|
@@ -24,8 +24,8 @@ def oauth2_auth_code(app_id:str) -> OAuth2TokenCredentials:
|
|
24
24
|
# def oauth2_implicit(app_id:str) -> BearerTokenAuthCredentials:
|
25
25
|
# return get_application_connection_credentials(ConnectionType.OAUTH2_IMPLICIT, app_id=app_id)
|
26
26
|
|
27
|
-
|
28
|
-
|
27
|
+
def oauth2_password(app_id:str) -> OAuth2TokenCredentials:
|
28
|
+
return get_application_connection_credentials(ConnectionType.OAUTH2_PASSWORD, app_id=app_id)
|
29
29
|
|
30
30
|
def oauth2_client_creds(app_id:str) -> OAuth2TokenCredentials:
|
31
31
|
return get_application_connection_credentials(ConnectionType.OAUTH2_CLIENT_CREDS, app_id=app_id)
|