ibm-watsonx-orchestrate 1.8.1__py3-none-any.whl → 1.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ibm_watsonx_orchestrate/__init__.py +1 -2
- ibm_watsonx_orchestrate/agent_builder/knowledge_bases/types.py +2 -2
- ibm_watsonx_orchestrate/agent_builder/models/types.py +5 -0
- ibm_watsonx_orchestrate/agent_builder/tools/python_tool.py +18 -6
- ibm_watsonx_orchestrate/agent_builder/tools/types.py +5 -3
- ibm_watsonx_orchestrate/cli/commands/agents/agents_controller.py +15 -3
- ibm_watsonx_orchestrate/cli/commands/connections/connections_controller.py +6 -3
- ibm_watsonx_orchestrate/cli/commands/copilot/copilot_controller.py +103 -23
- ibm_watsonx_orchestrate/cli/commands/models/model_provider_mapper.py +17 -13
- ibm_watsonx_orchestrate/cli/commands/server/server_command.py +146 -36
- ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_command.py +4 -2
- ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_controller.py +9 -1
- ibm_watsonx_orchestrate/cli/commands/tools/tools_controller.py +1 -1
- ibm_watsonx_orchestrate/client/connections/connections_client.py +14 -2
- ibm_watsonx_orchestrate/client/copilot/cpe/copilot_cpe_client.py +5 -3
- ibm_watsonx_orchestrate/docker/compose-lite.yml +124 -9
- ibm_watsonx_orchestrate/docker/default.env +22 -17
- ibm_watsonx_orchestrate/flow_builder/flows/__init__.py +2 -2
- ibm_watsonx_orchestrate/flow_builder/flows/constants.py +2 -0
- ibm_watsonx_orchestrate/flow_builder/flows/flow.py +52 -10
- ibm_watsonx_orchestrate/flow_builder/node.py +34 -3
- ibm_watsonx_orchestrate/flow_builder/types.py +144 -25
- ibm_watsonx_orchestrate/flow_builder/utils.py +7 -4
- {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/METADATA +1 -1
- {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/RECORD +28 -28
- {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/WHEEL +0 -0
- {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/entry_points.txt +0 -0
- {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,7 @@
|
|
1
|
+
# These credentials are for local development only.
|
2
|
+
# They are default values and can be overridden by the user.
|
3
|
+
# These do NOT provide access to any production or sensitive
|
4
|
+
|
1
5
|
#DOCKER_IAM_KEY=dummy #Must Define in env
|
2
6
|
#You can generate any JWT_SECRET with python -c 'import secrets; print(secrets.token_hex(32))'
|
3
7
|
JWT_SECRET=11759cbc89dbec64956715e10a854eb38f8b7a1775bdf68142786170f5e8b5b2
|
@@ -53,27 +57,27 @@ EVENT_BROKER_TTL="3600"
|
|
53
57
|
REGISTRY_URL=
|
54
58
|
|
55
59
|
|
56
|
-
SERVER_TAG=
|
60
|
+
SERVER_TAG=01-08-2025
|
57
61
|
SERVER_REGISTRY=
|
58
62
|
|
59
|
-
WORKER_TAG=
|
63
|
+
WORKER_TAG=01-08-2025-v2
|
60
64
|
WORKER_REGISTRY=
|
61
65
|
|
62
|
-
AI_GATEWAY_TAG=
|
66
|
+
AI_GATEWAY_TAG=01-08-2025-v1
|
63
67
|
AI_GATEWAY_REGISTRY=
|
64
68
|
|
65
|
-
AGENT_GATEWAY_TAG=
|
69
|
+
AGENT_GATEWAY_TAG=29-07-2025
|
66
70
|
AGENT_GATEWAY_REGISTRY=
|
67
71
|
|
68
72
|
DB_REGISTRY=
|
69
73
|
# If you build multiarch set all three of these to the same, we have a pr against main
|
70
74
|
# to not have this separation, but we can merge it later
|
71
|
-
DBTAG=
|
72
|
-
AMDDBTAG=
|
73
|
-
ARM64DBTAG=
|
75
|
+
DBTAG=29-07-2025-9f3661b
|
76
|
+
AMDDBTAG=29-07-2025-9f3661b
|
77
|
+
ARM64DBTAG=29-07-2025-9f3661b
|
74
78
|
|
75
79
|
UI_REGISTRY=
|
76
|
-
UITAG=
|
80
|
+
UITAG=31-07-2025
|
77
81
|
|
78
82
|
CM_REGISTRY=
|
79
83
|
CM_TAG=24-07-2025
|
@@ -84,33 +88,33 @@ TRM_REGISTRY=
|
|
84
88
|
TR_TAG=23-07-2025-3c60549f0bac275de3e5736265a3fd49cdd3a203
|
85
89
|
TR_REGISTRY=
|
86
90
|
|
87
|
-
BUILDER_TAG=
|
91
|
+
BUILDER_TAG=31-07-2025-d7145cb
|
88
92
|
BUILDER_REGISTRY=
|
89
93
|
|
90
|
-
FLOW_RUNTIME_TAG=
|
94
|
+
FLOW_RUNTIME_TAG=01-08-2025
|
91
95
|
FLOW_RUMTIME_REGISTRY=
|
92
96
|
|
93
97
|
|
94
|
-
AGENT_ANALYTICS_TAG=
|
98
|
+
AGENT_ANALYTICS_TAG=05-08-2025
|
95
99
|
AGENT_ANALYTICS_REGISTRY=
|
96
100
|
|
97
|
-
JAEGER_PROXY_TAG=
|
101
|
+
JAEGER_PROXY_TAG=23-07-2025
|
98
102
|
JAEGER_PROXY_REGISTRY=
|
99
103
|
|
100
104
|
SOCKET_HANDLER_TAG=29-05-2025
|
101
105
|
SOCKET_HANDLER_REGISTRY=
|
102
106
|
|
103
|
-
CPE_TAG=
|
107
|
+
CPE_TAG=06-08-2025-b0a20ad
|
104
108
|
CPE_REGISTRY=
|
105
109
|
|
106
110
|
# IBM Document Processing
|
107
111
|
WDU_TAG=2.5.0
|
108
112
|
WDU_REGISTRY=
|
109
113
|
|
110
|
-
DOCPROC_DPS_TAG=
|
111
|
-
DOCPROC_LLMSERVICE_TAG=
|
112
|
-
DOCPROC_CACHE_TAG=
|
113
|
-
DOCPROC_DPI_TAG=
|
114
|
+
DOCPROC_DPS_TAG=20250721-164412-250-503756a
|
115
|
+
DOCPROC_LLMSERVICE_TAG=20250725-100249-111-51d3e51
|
116
|
+
DOCPROC_CACHE_TAG=20250723-100852-70-9edc1ab
|
117
|
+
DOCPROC_DPI_TAG=20250731-155328-257-06879e86
|
114
118
|
DOCPROC_REGISTRY=
|
115
119
|
|
116
120
|
# END -- IMAGE REGISTRIES AND TAGS
|
@@ -178,6 +182,7 @@ CALLBACK_HOST_URL=
|
|
178
182
|
|
179
183
|
AGENTOPS_API_KEY_AUTH_ENABLED=true
|
180
184
|
AGENTOPS_API_KEY=qwertyuiop
|
185
|
+
FORCE_SINGLE_TENANT=true
|
181
186
|
|
182
187
|
RUNTIME_MANAGER_API_KEY=example
|
183
188
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from .constants import START, END, RESERVED
|
2
2
|
|
3
|
-
from ..types import FlowContext, TaskData, TaskEventType,
|
3
|
+
from ..types import FlowContext, TaskData, TaskEventType, DocProcInput, DecisionsCondition, DecisionsRule
|
4
4
|
from ..node import UserNode, AgentNode, StartNode, EndNode, PromptNode, ToolNode, DecisionsNode
|
5
5
|
|
6
6
|
from .flow import Flow, CompiledFlow, FlowRun, FlowEvent, FlowEventType, FlowFactory, MatchPolicy, WaitPolicy, ForeachPolicy, Branch, Foreach, Loop
|
@@ -16,7 +16,7 @@ __all__ = [
|
|
16
16
|
"FlowContext",
|
17
17
|
"TaskData",
|
18
18
|
"TaskEventType",
|
19
|
-
"
|
19
|
+
"DocProcInput",
|
20
20
|
|
21
21
|
"DocProcNode",
|
22
22
|
"UserNode",
|
@@ -8,7 +8,7 @@ from datetime import datetime
|
|
8
8
|
from enum import Enum
|
9
9
|
import inspect
|
10
10
|
from typing import (
|
11
|
-
Any, AsyncIterator, Callable, cast, List, Sequence, Union, Tuple
|
11
|
+
Any, AsyncIterator, Callable, Optional, cast, List, Sequence, Union, Tuple
|
12
12
|
)
|
13
13
|
import json
|
14
14
|
import logging
|
@@ -18,7 +18,7 @@ import pytz
|
|
18
18
|
import os
|
19
19
|
|
20
20
|
from typing_extensions import Self
|
21
|
-
from pydantic import BaseModel, Field, SerializeAsAny
|
21
|
+
from pydantic import BaseModel, Field, SerializeAsAny, create_model, TypeAdapter
|
22
22
|
import yaml
|
23
23
|
from ibm_watsonx_orchestrate.agent_builder.tools.python_tool import PythonTool
|
24
24
|
from ibm_watsonx_orchestrate.client.tools.tool_client import ToolClient
|
@@ -27,11 +27,11 @@ from ibm_watsonx_orchestrate.client.utils import instantiate_client
|
|
27
27
|
from ..types import (
|
28
28
|
EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PromptLLMParameters, PromptNodeSpec,
|
29
29
|
StartNodeSpec, ToolSpec, JsonSchemaObject, ToolRequestBody, ToolResponseBody, UserFieldKind, UserFieldOption, UserFlowSpec, UserNodeSpec, WaitPolicy,
|
30
|
-
DocProcSpec, TextExtractionResponse,
|
30
|
+
DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File
|
31
31
|
)
|
32
32
|
from .constants import CURRENT_USER, START, END, ANY_USER
|
33
33
|
from ..node import (
|
34
|
-
EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode
|
34
|
+
EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode
|
35
35
|
)
|
36
36
|
from ..types import (
|
37
37
|
AgentNodeSpec, extract_node_spec, FlowContext, FlowEventType, FlowEvent, FlowSpec,
|
@@ -190,7 +190,7 @@ class Flow(Node):
|
|
190
190
|
|
191
191
|
def _add_schema_ref(self, schema: JsonSchemaObject, title: str = None) -> SchemaRef:
|
192
192
|
'''Create a schema reference'''
|
193
|
-
if schema and (schema.type == "object" or schema.type == "array"):
|
193
|
+
if schema and (schema.type == "object" or schema.type == "array" or schema.type == "string"):
|
194
194
|
new_schema = self._add_schema(schema, title)
|
195
195
|
return SchemaRef(ref=f"#/schemas/{new_schema.title}")
|
196
196
|
raise AssertionError(f"schema is not a complex object: {schema}")
|
@@ -199,7 +199,7 @@ class Flow(Node):
|
|
199
199
|
self._refactor_spec_to_schemaref(node.spec)
|
200
200
|
|
201
201
|
def _refactor_spec_to_schemaref(self, spec: NodeSpec):
|
202
|
-
if spec.input_schema:
|
202
|
+
if spec.input_schema and (spec.input_schema.type == "object" or spec.input_schema.type == "array") :
|
203
203
|
if isinstance(spec.input_schema, ToolRequestBody):
|
204
204
|
spec.input_schema = self._add_schema_ref(JsonSchemaObject(type = spec.input_schema.type,
|
205
205
|
properties= spec.input_schema.properties,
|
@@ -433,6 +433,50 @@ class Flow(Node):
|
|
433
433
|
node = self._add_node(node)
|
434
434
|
return cast(PromptNode, node)
|
435
435
|
|
436
|
+
def docext(self,
|
437
|
+
name: str,
|
438
|
+
llm : str = "meta-llama/llama-3-2-11b-vision-instruct",
|
439
|
+
version: str = "TIP",
|
440
|
+
display_name: str| None = None,
|
441
|
+
input_entities: type[BaseModel]| None = None,
|
442
|
+
description: str | None = None,
|
443
|
+
input_map: DataMap = None) -> tuple[DocExtNode, type[BaseModel]]:
|
444
|
+
|
445
|
+
if name is None :
|
446
|
+
raise ValueError("name must be provided.")
|
447
|
+
|
448
|
+
doc_ext_config = DocExtNode.generate_config(llm=llm, input_entites=input_entities)
|
449
|
+
|
450
|
+
DocExtFieldValue = DocExtNode.generate_docext_field_value_model(input_entities=input_entities)
|
451
|
+
|
452
|
+
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = File)
|
453
|
+
output_schema_obj = _get_json_schema_obj("output", DocExtFieldValue)
|
454
|
+
|
455
|
+
if "$defs" in output_schema_obj.model_extra:
|
456
|
+
output_schema_obj.model_extra.pop("$defs")
|
457
|
+
|
458
|
+
# Create the docext spec
|
459
|
+
task_spec = DocExtSpec(
|
460
|
+
name=name,
|
461
|
+
display_name=display_name if display_name is not None else name,
|
462
|
+
description=description,
|
463
|
+
input_schema=_get_tool_request_body(input_schema_obj),
|
464
|
+
output_schema=_get_tool_response_body(output_schema_obj),
|
465
|
+
output_schema_object = output_schema_obj,
|
466
|
+
config=doc_ext_config,
|
467
|
+
version=version
|
468
|
+
)
|
469
|
+
node = DocExtNode(spec=task_spec)
|
470
|
+
|
471
|
+
# setup input map
|
472
|
+
if input_map:
|
473
|
+
node.input_map = self._get_data_map(input_map)
|
474
|
+
|
475
|
+
# add the node to the list of node
|
476
|
+
|
477
|
+
node = self._add_node(node)
|
478
|
+
return cast(DocExtNode, node), DocExtFieldValue
|
479
|
+
|
436
480
|
def decisions(self,
|
437
481
|
name: str,
|
438
482
|
display_name: str|None=None,
|
@@ -486,14 +530,12 @@ class Flow(Node):
|
|
486
530
|
if name is None :
|
487
531
|
raise ValueError("name must be provided.")
|
488
532
|
|
489
|
-
if task is None:
|
490
|
-
raise ValueError("task must be provided.")
|
491
533
|
|
492
534
|
output_schema_dict = {
|
493
535
|
"text_extraction" : TextExtractionResponse
|
494
536
|
}
|
495
537
|
# create input spec
|
496
|
-
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def =
|
538
|
+
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocProcInput)
|
497
539
|
output_schema_obj = _get_json_schema_obj("output", output_schema_dict[task])
|
498
540
|
if "$defs" in output_schema_obj.model_extra:
|
499
541
|
output_schema_obj.model_extra.pop("$defs")
|
@@ -1018,8 +1060,8 @@ class FlowFactory(BaseModel):
|
|
1018
1060
|
raise ValueError("Only functions with @flow_spec can be used to create a Flow specification.")
|
1019
1061
|
return Flow(spec = flow_spec)
|
1020
1062
|
|
1021
|
-
# create input spec
|
1022
1063
|
input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = input_schema)
|
1064
|
+
# create input spec
|
1023
1065
|
output_schema_obj = _get_json_schema_obj("output", output_schema)
|
1024
1066
|
if initiators is None:
|
1025
1067
|
initiators = []
|
@@ -1,11 +1,14 @@
|
|
1
1
|
import json
|
2
|
-
from typing import Any, cast
|
2
|
+
from typing import Any, cast, Type
|
3
3
|
import uuid
|
4
4
|
|
5
5
|
import yaml
|
6
|
-
from pydantic import BaseModel, Field, SerializeAsAny
|
6
|
+
from pydantic import BaseModel, Field, SerializeAsAny, create_model
|
7
|
+
from enum import Enum
|
8
|
+
|
9
|
+
from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
|
10
|
+
DocExtSpec, DocExtConfig, LanguageCode, DecisionsNodeSpec
|
7
11
|
|
8
|
-
from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, DecisionsNodeSpec
|
9
12
|
from .data_map import DataMap
|
10
13
|
|
11
14
|
class Node(BaseModel):
|
@@ -116,6 +119,34 @@ class DocProcNode(Node):
|
|
116
119
|
|
117
120
|
def get_spec(self) -> DocProcSpec:
|
118
121
|
return cast(DocProcSpec, self.spec)
|
122
|
+
|
123
|
+
class DocExtNode(Node):
|
124
|
+
def __repr__(self):
|
125
|
+
return f"DocExtNode(name='{self.spec.name}', description='{self.spec.description}')"
|
126
|
+
|
127
|
+
def get_spec(self) -> DocExtSpec:
|
128
|
+
return cast(DocExtSpec, self.spec)
|
129
|
+
|
130
|
+
@staticmethod
|
131
|
+
def generate_config(llm: str, input_entites: type[BaseModel]) -> DocExtConfig:
|
132
|
+
entities = input_entites.__dict__.values()
|
133
|
+
return DocExtConfig(llm=llm, entities=entities)
|
134
|
+
|
135
|
+
@staticmethod
|
136
|
+
def generate_docext_field_value_model(input_entities: type[BaseModel]) -> type[BaseModel]:
|
137
|
+
create_field_value_description = lambda field_name: "Extracted value for " + field_name
|
138
|
+
|
139
|
+
DocExtFieldValue = create_model(
|
140
|
+
"DocExtFieldValue",
|
141
|
+
**{
|
142
|
+
name: (str, Field(
|
143
|
+
title=value['name'],
|
144
|
+
description=create_field_value_description(value['name']),
|
145
|
+
)
|
146
|
+
)
|
147
|
+
for name, value in input_entities.model_dump().items()})
|
148
|
+
return DocExtFieldValue
|
149
|
+
|
119
150
|
class DecisionsNode(Node):
|
120
151
|
def __repr__(self):
|
121
152
|
return f"DecisionsNode(name='{self.spec.name}', description='{self.spec.description}')"
|
@@ -5,11 +5,14 @@ import numbers
|
|
5
5
|
import inspect
|
6
6
|
import logging
|
7
7
|
from typing import (
|
8
|
-
Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union
|
8
|
+
Annotated, Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union, NewType
|
9
9
|
)
|
10
|
+
from typing_extensions import Doc
|
10
11
|
|
11
12
|
import docstring_parser
|
12
|
-
from pydantic import BaseModel, Field
|
13
|
+
from pydantic import BaseModel, Field, GetCoreSchemaHandler, GetJsonSchemaHandler, RootModel
|
14
|
+
from pydantic_core import core_schema
|
15
|
+
from pydantic.json_schema import JsonSchemaValue
|
13
16
|
|
14
17
|
from langchain_core.tools.base import create_schema_from_function
|
15
18
|
from langchain_core.utils.json_schema import dereference_refs
|
@@ -86,7 +89,11 @@ def _to_json_from_input_schema(schema: Union[ToolRequestBody, SchemaRef]) -> dic
|
|
86
89
|
model_spec["properties"] = {}
|
87
90
|
for prop_name, prop_schema in request_body.properties.items():
|
88
91
|
model_spec["properties"][prop_name] = _to_json_from_json_schema(prop_schema)
|
89
|
-
model_spec["required"] = request_body.required
|
92
|
+
model_spec["required"] = request_body.required if request_body.required else []
|
93
|
+
if schema.model_extra:
|
94
|
+
for k, v in schema.model_extra.items():
|
95
|
+
model_spec[k] = v
|
96
|
+
|
90
97
|
elif isinstance(schema, SchemaRef):
|
91
98
|
model_spec["$ref"] = schema.ref
|
92
99
|
|
@@ -163,14 +170,99 @@ class NodeSpec(BaseModel):
|
|
163
170
|
|
164
171
|
return model_spec
|
165
172
|
|
173
|
+
class DocExtConfigEntity(BaseModel):
|
174
|
+
name: str = Field(description="Entity name")
|
175
|
+
type: Literal["string", "date", "number"] = Field(default="string", description="The type of the entity values")
|
176
|
+
description: str = Field(title="Description", description="Description of the entity", default="")
|
177
|
+
field_name: str = Field(title="Field Name", description="The normalized name of the entity", default="")
|
178
|
+
multiple_mentions: bool = Field(title="Multiple mentions",description="When true, we can produce multiple mentions of this entity", default=False)
|
179
|
+
example_value: str = Field(description="Value of example", default="")
|
180
|
+
examples: list[str] = Field(title="Examples", description="Examples that help the LLM understand the expected entity mentions", default=[])
|
181
|
+
|
182
|
+
class DocExtConfig(BaseModel):
|
183
|
+
domain: str = Field(description="Domiain of the document", default="other")
|
184
|
+
type: str = Field(description="Document type", default="agreement")
|
185
|
+
llm: str = Field(description="The LLM used for the document extraction", default="meta-llama/llama-3-2-11b-vision-instruct")
|
186
|
+
entities: list[DocExtConfigEntity] = Field(default=[])
|
187
|
+
|
188
|
+
class LanguageCode(StrEnum):
|
189
|
+
en = auto()
|
190
|
+
fr = auto()
|
191
|
+
|
192
|
+
class DocProcCommonNodeSpec(NodeSpec):
|
193
|
+
enable_hw: bool | None = Field(description="Boolean value indicating if hand-written feature is enabled.", title="Enable handwritten", default=False)
|
194
|
+
|
195
|
+
class DocExtSpec(DocProcCommonNodeSpec):
|
196
|
+
version : str = Field(description="A version of the spec")
|
197
|
+
config : DocExtConfig
|
198
|
+
|
199
|
+
def __init__(self, **data):
|
200
|
+
super().__init__(**data)
|
201
|
+
self.kind = "docext"
|
202
|
+
|
203
|
+
def to_json(self) -> dict[str, Any]:
|
204
|
+
model_spec = super().to_json()
|
205
|
+
model_spec["version"] = self.version
|
206
|
+
model_spec["config"] = self.config.model_dump()
|
207
|
+
return model_spec
|
208
|
+
|
209
|
+
class DocProcField(BaseModel):
|
210
|
+
description: str = Field(description="A description of the field to extract from the document.")
|
211
|
+
example: str = Field(description="An example of the field to extract from the document.", default='')
|
212
|
+
default: Optional[str] = Field(description="A default value for the field to extract from the document.", default='')
|
213
|
+
|
214
|
+
class DocProcTable(BaseModel):
|
215
|
+
type: Literal["array"]
|
216
|
+
description: str = Field(description="A description of the table to extract from the document.")
|
217
|
+
columns: dict[str,DocProcField] = Field(description="The columns to extract from the table. These are the keys in the table extraction result.")
|
218
|
+
|
219
|
+
class DocProcKVPSchema(BaseModel):
|
220
|
+
document_type: str = Field(description="A label for the kind of documents we want to extract")
|
221
|
+
document_description: str = Field(description="A description of the kind of documents we want to extractI. This is used to select which schema to use for extraction.")
|
222
|
+
fields: dict[str, DocProcField | DocProcTable] = Field(description="The fields to extract from the document. These are the keys in the KVP extraction result.")
|
223
|
+
|
224
|
+
class DocProcBoundingBox(BaseModel):
|
225
|
+
x: float = Field(description="The x coordinate of the bounding box.")
|
226
|
+
y: float = Field(description="The y coordinate of the bounding box.")
|
227
|
+
width: float = Field(description="The width of the bounding box.")
|
228
|
+
height: float = Field(description="The height of the bounding box.")
|
229
|
+
page_number: int = Field(description="The page number of the bounding box in the document.")
|
230
|
+
|
231
|
+
class KVPBaseEntry(BaseModel):
|
232
|
+
id: str = Field(description="A unique identifier.")
|
233
|
+
raw_text: str = Field(description="The raw text.")
|
234
|
+
normalized_text: Optional[str] = Field(description="The normalized text.", default=None)
|
235
|
+
confidence_score: Optional[float] = Field(description="The confidence score.", default=None)
|
236
|
+
bbox: Optional[DocProcBoundingBox] = Field(description="The bounding box in the document.", default=None)
|
237
|
+
|
238
|
+
class DocProcKey(KVPBaseEntry):
|
239
|
+
semantic_label: str = Field(description="A semantic label for the key.")
|
240
|
+
|
241
|
+
class DocProcValue(KVPBaseEntry):
|
242
|
+
pass
|
243
|
+
|
244
|
+
class DocProcKVP(BaseModel):
|
245
|
+
id: str = Field(description="A unique identifier for the key-value pair.")
|
246
|
+
type: Literal["key_value","only_value"]
|
247
|
+
key: DocProcKey = Field(description="The key of the key-value pair.")
|
248
|
+
value: DocProcValue = Field(description="The value of the key-value pair.")
|
249
|
+
group_id: Optional[str] = Field(default=None, description="The group id of the key-value pair. This is used to group key-value pairs together.")
|
250
|
+
table_id: Optional[str] = Field(default=None, description="The table id of the key-value pair. This is used to group key-value pairs together in a table.")
|
251
|
+
table_name: Optional[str] = Field(default=None, description="The name of the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
|
252
|
+
table_row_index: Optional[int] = Field(default=None, description="The index of the row in the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
|
253
|
+
|
166
254
|
class DocProcTask(StrEnum):
|
167
255
|
'''
|
168
256
|
Possible names for the Document processing task parameter
|
169
257
|
'''
|
170
258
|
text_extraction = auto()
|
171
259
|
|
172
|
-
class DocProcSpec(
|
260
|
+
class DocProcSpec(DocProcCommonNodeSpec):
|
173
261
|
task: DocProcTask = Field(description='The document processing operation name', default=DocProcTask.text_extraction)
|
262
|
+
kvp_schema: List[DocProcKVPSchema] | None = Field(
|
263
|
+
title='KVP schemas',
|
264
|
+
description="Optional list of key-value pair schemas to use for extraction.",
|
265
|
+
default=None)
|
174
266
|
|
175
267
|
def __init__(self, **data):
|
176
268
|
super().__init__(**data)
|
@@ -673,6 +765,7 @@ class TaskEventType(Enum):
|
|
673
765
|
ON_TASK_END = "task:on_task_end"
|
674
766
|
ON_TASK_STREAM = "task:on_task_stream"
|
675
767
|
ON_TASK_ERROR = "task:on_task_error"
|
768
|
+
ON_TASK_RESUME= "task:on_task_resume"
|
676
769
|
|
677
770
|
class FlowData(BaseModel):
|
678
771
|
'''This class represents the data that is passed between tasks in a flow.'''
|
@@ -707,7 +800,7 @@ class FlowEventType(Enum):
|
|
707
800
|
ON_FLOW_START = "flow:on_flow_start"
|
708
801
|
ON_FLOW_END = "flow:on_flow_end"
|
709
802
|
ON_FLOW_ERROR = "flow:on_flow_error"
|
710
|
-
|
803
|
+
ON_FLOW_RESUME = "flow:on_flow_resume"
|
711
804
|
|
712
805
|
@dataclass
|
713
806
|
class FlowEvent:
|
@@ -743,41 +836,67 @@ class LanguageCode(StrEnum):
|
|
743
836
|
fr = auto()
|
744
837
|
en_hw = auto()
|
745
838
|
|
746
|
-
|
839
|
+
|
840
|
+
class File(str):
|
841
|
+
@classmethod
|
842
|
+
def __get_pydantic_core_schema__(
|
843
|
+
cls, source_type: Any, handler: GetCoreSchemaHandler
|
844
|
+
) -> core_schema.CoreSchema:
|
845
|
+
return core_schema.no_info_wrap_validator_function(
|
846
|
+
cls.validate,
|
847
|
+
core_schema.str_schema(),
|
848
|
+
serialization=core_schema.plain_serializer_function_ser_schema(lambda v: str(v))
|
849
|
+
)
|
850
|
+
|
851
|
+
@classmethod
|
852
|
+
def validate(cls, value: Any) -> "File":
|
853
|
+
if not isinstance(value, str):
|
854
|
+
raise TypeError("File must be a document reference (string)")
|
855
|
+
return cls(value)
|
856
|
+
|
857
|
+
@classmethod
|
858
|
+
def __get_pydantic_json_schema__(
|
859
|
+
cls, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
|
860
|
+
) -> JsonSchemaValue:
|
861
|
+
return {
|
862
|
+
"type": "string",
|
863
|
+
"title": "Document reference",
|
864
|
+
"format": "binary",
|
865
|
+
"description": "Either an ID or a URL identifying the document to be used.",
|
866
|
+
"wrap_data": False,
|
867
|
+
"required": []
|
868
|
+
}
|
869
|
+
|
870
|
+
class DocExtInput(BaseModel):
|
871
|
+
document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
|
872
|
+
|
873
|
+
|
874
|
+
class DocProcInput(BaseModel):
|
747
875
|
'''
|
748
876
|
This class represents the input of a Document processing task.
|
749
877
|
|
750
878
|
Attributes:
|
751
879
|
document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
|
752
880
|
language (LanguageCode): Optional language code used when processing the input document
|
881
|
+
kvp_schemas (List[DocProcKVPSchema]): Optional list of key-value pair schemas to use for extraction. If not provided or None, no KVPs will be extracted. If an empty list is provided, we will use the internal schemas to extract KVPS.
|
753
882
|
'''
|
754
883
|
# This is declared as bytes but the runtime will understand if a URL is send in as input.
|
755
884
|
# We need to use bytes here for Chat-with-doc to recognize the input as a File.
|
756
|
-
document_ref: bytes |
|
757
|
-
|
758
|
-
title='
|
759
|
-
|
760
|
-
|
761
|
-
language: Optional[LanguageCode] = Field(
|
762
|
-
description='Optional language code of the document, defaults to "en"',
|
763
|
-
title='Document language code',
|
764
|
-
default=LanguageCode.en)
|
765
|
-
|
766
|
-
class TextExtraction(BaseModel):
|
767
|
-
'''
|
768
|
-
This class represents the output generated by a "text_extraction" document processing (docproc) operation.
|
769
|
-
Attributes:
|
770
|
-
text (str): the text extracted from the input document.
|
771
|
-
'''
|
772
|
-
text: str = Field(description='The text extracted from the input document', title='Text extraction')
|
885
|
+
document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
|
886
|
+
kvp_schemas: Optional[List[DocProcKVPSchema]] = Field(
|
887
|
+
title='KVP schemas',
|
888
|
+
description="Optional list of key-value pair schemas to use for extraction.",
|
889
|
+
default=None)
|
773
890
|
|
774
891
|
class TextExtractionResponse(BaseModel):
|
775
892
|
'''
|
776
893
|
The text extraction operation response.
|
777
894
|
Attributes:
|
778
|
-
|
895
|
+
text (str): the text extracted from the input document.
|
896
|
+
kvps (Optional[list[DocProcKVP]]): A list of key-value pairs extracted from the document. If no KVPs were extracted, this will be None.
|
779
897
|
'''
|
780
|
-
|
898
|
+
text: str = Field(description='The text extracted from the input document', title='text')
|
899
|
+
kvps: Optional[list[DocProcKVP]] = Field(description="A list of key-value pairs extracted from the document.", default=None)
|
781
900
|
|
782
901
|
|
783
902
|
class DecisionsCondition(BaseModel):
|
@@ -7,7 +7,6 @@ from pydantic import BaseModel, TypeAdapter
|
|
7
7
|
|
8
8
|
from langchain_core.utils.json_schema import dereference_refs
|
9
9
|
import typer
|
10
|
-
import yaml
|
11
10
|
|
12
11
|
from ibm_watsonx_orchestrate.agent_builder.tools.base_tool import BaseTool
|
13
12
|
from ibm_watsonx_orchestrate.agent_builder.tools.flow_tool import create_flow_json_tool
|
@@ -90,9 +89,13 @@ def _get_tool_request_body(schema_obj: JsonSchemaObject) -> ToolRequestBody:
|
|
90
89
|
request_obj = ToolRequestBody(type='object', properties=schema_obj.properties, required=schema_obj.required)
|
91
90
|
if schema_obj.model_extra:
|
92
91
|
request_obj.__pydantic_extra__ = schema_obj.model_extra
|
93
|
-
else:
|
94
|
-
|
95
|
-
|
92
|
+
else:
|
93
|
+
if schema_obj.wrap_data:
|
94
|
+
# we need to wrap a simple type with an object
|
95
|
+
request_obj = ToolRequestBody(type='object', properties={}, required=[])
|
96
|
+
request_obj.properties["data"] = schema_obj
|
97
|
+
else:
|
98
|
+
request_obj = ToolRequestBody(type=schema_obj.type, title=schema_obj.title, description=schema_obj.description, format=schema_obj.format)
|
96
99
|
if schema_obj.model_extra:
|
97
100
|
request_obj.__pydantic_extra__ = schema_obj.model_extra
|
98
101
|
|