ibm-watsonx-orchestrate 1.8.1__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ibm_watsonx_orchestrate/__init__.py +1 -2
  2. ibm_watsonx_orchestrate/agent_builder/knowledge_bases/types.py +2 -2
  3. ibm_watsonx_orchestrate/agent_builder/models/types.py +5 -0
  4. ibm_watsonx_orchestrate/agent_builder/tools/python_tool.py +18 -6
  5. ibm_watsonx_orchestrate/agent_builder/tools/types.py +5 -3
  6. ibm_watsonx_orchestrate/cli/commands/agents/agents_controller.py +15 -3
  7. ibm_watsonx_orchestrate/cli/commands/connections/connections_controller.py +6 -3
  8. ibm_watsonx_orchestrate/cli/commands/copilot/copilot_controller.py +103 -23
  9. ibm_watsonx_orchestrate/cli/commands/models/model_provider_mapper.py +17 -13
  10. ibm_watsonx_orchestrate/cli/commands/server/server_command.py +146 -36
  11. ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_command.py +4 -2
  12. ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_controller.py +9 -1
  13. ibm_watsonx_orchestrate/cli/commands/tools/tools_controller.py +1 -1
  14. ibm_watsonx_orchestrate/client/connections/connections_client.py +14 -2
  15. ibm_watsonx_orchestrate/client/copilot/cpe/copilot_cpe_client.py +5 -3
  16. ibm_watsonx_orchestrate/docker/compose-lite.yml +124 -9
  17. ibm_watsonx_orchestrate/docker/default.env +22 -17
  18. ibm_watsonx_orchestrate/flow_builder/flows/__init__.py +2 -2
  19. ibm_watsonx_orchestrate/flow_builder/flows/constants.py +2 -0
  20. ibm_watsonx_orchestrate/flow_builder/flows/flow.py +52 -10
  21. ibm_watsonx_orchestrate/flow_builder/node.py +34 -3
  22. ibm_watsonx_orchestrate/flow_builder/types.py +144 -25
  23. ibm_watsonx_orchestrate/flow_builder/utils.py +7 -4
  24. {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/METADATA +1 -1
  25. {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/RECORD +28 -28
  26. {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/WHEEL +0 -0
  27. {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/entry_points.txt +0 -0
  28. {ibm_watsonx_orchestrate-1.8.1.dist-info → ibm_watsonx_orchestrate-1.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,3 +1,7 @@
1
+ # These credentials are for local development only.
2
+ # They are default values and can be overridden by the user.
3
+ # These do NOT provide access to any production or sensitive
4
+
1
5
  #DOCKER_IAM_KEY=dummy #Must Define in env
2
6
  #You can generate any JWT_SECRET with python -c 'import secrets; print(secrets.token_hex(32))'
3
7
  JWT_SECRET=11759cbc89dbec64956715e10a854eb38f8b7a1775bdf68142786170f5e8b5b2
@@ -53,27 +57,27 @@ EVENT_BROKER_TTL="3600"
53
57
  REGISTRY_URL=
54
58
 
55
59
 
56
- SERVER_TAG=22-07-2025
60
+ SERVER_TAG=01-08-2025
57
61
  SERVER_REGISTRY=
58
62
 
59
- WORKER_TAG=22-07-2025
63
+ WORKER_TAG=01-08-2025-v2
60
64
  WORKER_REGISTRY=
61
65
 
62
- AI_GATEWAY_TAG=21-07-2025
66
+ AI_GATEWAY_TAG=01-08-2025-v1
63
67
  AI_GATEWAY_REGISTRY=
64
68
 
65
- AGENT_GATEWAY_TAG=07-07-2025
69
+ AGENT_GATEWAY_TAG=29-07-2025
66
70
  AGENT_GATEWAY_REGISTRY=
67
71
 
68
72
  DB_REGISTRY=
69
73
  # If you build multiarch set all three of these to the same, we have a pr against main
70
74
  # to not have this separation, but we can merge it later
71
- DBTAG=22-07-2025
72
- AMDDBTAG=24-06-2025-v1
73
- ARM64DBTAG=24-06-2025-v1
75
+ DBTAG=29-07-2025-9f3661b
76
+ AMDDBTAG=29-07-2025-9f3661b
77
+ ARM64DBTAG=29-07-2025-9f3661b
74
78
 
75
79
  UI_REGISTRY=
76
- UITAG=23-07-2025
80
+ UITAG=31-07-2025
77
81
 
78
82
  CM_REGISTRY=
79
83
  CM_TAG=24-07-2025
@@ -84,33 +88,33 @@ TRM_REGISTRY=
84
88
  TR_TAG=23-07-2025-3c60549f0bac275de3e5736265a3fd49cdd3a203
85
89
  TR_REGISTRY=
86
90
 
87
- BUILDER_TAG=22-07-2025-v1
91
+ BUILDER_TAG=31-07-2025-d7145cb
88
92
  BUILDER_REGISTRY=
89
93
 
90
- FLOW_RUNTIME_TAG=15-07-2025
94
+ FLOW_RUNTIME_TAG=01-08-2025
91
95
  FLOW_RUMTIME_REGISTRY=
92
96
 
93
97
 
94
- AGENT_ANALYTICS_TAG=02-07-2025-v1
98
+ AGENT_ANALYTICS_TAG=05-08-2025
95
99
  AGENT_ANALYTICS_REGISTRY=
96
100
 
97
- JAEGER_PROXY_TAG=01-07-2025
101
+ JAEGER_PROXY_TAG=23-07-2025
98
102
  JAEGER_PROXY_REGISTRY=
99
103
 
100
104
  SOCKET_HANDLER_TAG=29-05-2025
101
105
  SOCKET_HANDLER_REGISTRY=
102
106
 
103
- CPE_TAG=17-07-2025
107
+ CPE_TAG=06-08-2025-b0a20ad
104
108
  CPE_REGISTRY=
105
109
 
106
110
  # IBM Document Processing
107
111
  WDU_TAG=2.5.0
108
112
  WDU_REGISTRY=
109
113
 
110
- DOCPROC_DPS_TAG=20250610-183301-248-865fbc1
111
- DOCPROC_LLMSERVICE_TAG=20250604-192056-107-e1d4d66
112
- DOCPROC_CACHE_TAG=20250610-214940-68-f3258f4
113
- DOCPROC_DPI_TAG=20250702-000808-237-7b1e424d
114
+ DOCPROC_DPS_TAG=20250721-164412-250-503756a
115
+ DOCPROC_LLMSERVICE_TAG=20250725-100249-111-51d3e51
116
+ DOCPROC_CACHE_TAG=20250723-100852-70-9edc1ab
117
+ DOCPROC_DPI_TAG=20250731-155328-257-06879e86
114
118
  DOCPROC_REGISTRY=
115
119
 
116
120
  # END -- IMAGE REGISTRIES AND TAGS
@@ -178,6 +182,7 @@ CALLBACK_HOST_URL=
178
182
 
179
183
  AGENTOPS_API_KEY_AUTH_ENABLED=true
180
184
  AGENTOPS_API_KEY=qwertyuiop
185
+ FORCE_SINGLE_TENANT=true
181
186
 
182
187
  RUNTIME_MANAGER_API_KEY=example
183
188
 
@@ -1,6 +1,6 @@
1
1
  from .constants import START, END, RESERVED
2
2
 
3
- from ..types import FlowContext, TaskData, TaskEventType, File, DecisionsCondition, DecisionsRule
3
+ from ..types import FlowContext, TaskData, TaskEventType, DocProcInput, DecisionsCondition, DecisionsRule
4
4
  from ..node import UserNode, AgentNode, StartNode, EndNode, PromptNode, ToolNode, DecisionsNode
5
5
 
6
6
  from .flow import Flow, CompiledFlow, FlowRun, FlowEvent, FlowEventType, FlowFactory, MatchPolicy, WaitPolicy, ForeachPolicy, Branch, Foreach, Loop
@@ -16,7 +16,7 @@ __all__ = [
16
16
  "FlowContext",
17
17
  "TaskData",
18
18
  "TaskEventType",
19
- "File",
19
+ "DocProcInput",
20
20
 
21
21
  "DocProcNode",
22
22
  "UserNode",
@@ -17,3 +17,5 @@ RESERVED = {
17
17
  ANY_USER,
18
18
  CURRENT_USER
19
19
  }
20
+
21
+
@@ -8,7 +8,7 @@ from datetime import datetime
8
8
  from enum import Enum
9
9
  import inspect
10
10
  from typing import (
11
- Any, AsyncIterator, Callable, cast, List, Sequence, Union, Tuple
11
+ Any, AsyncIterator, Callable, Optional, cast, List, Sequence, Union, Tuple
12
12
  )
13
13
  import json
14
14
  import logging
@@ -18,7 +18,7 @@ import pytz
18
18
  import os
19
19
 
20
20
  from typing_extensions import Self
21
- from pydantic import BaseModel, Field, SerializeAsAny
21
+ from pydantic import BaseModel, Field, SerializeAsAny, create_model, TypeAdapter
22
22
  import yaml
23
23
  from ibm_watsonx_orchestrate.agent_builder.tools.python_tool import PythonTool
24
24
  from ibm_watsonx_orchestrate.client.tools.tool_client import ToolClient
@@ -27,11 +27,11 @@ from ibm_watsonx_orchestrate.client.utils import instantiate_client
27
27
  from ..types import (
28
28
  EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PromptLLMParameters, PromptNodeSpec,
29
29
  StartNodeSpec, ToolSpec, JsonSchemaObject, ToolRequestBody, ToolResponseBody, UserFieldKind, UserFieldOption, UserFlowSpec, UserNodeSpec, WaitPolicy,
30
- DocProcSpec, TextExtractionResponse, File, DecisionsNodeSpec, DecisionsRule
30
+ DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File
31
31
  )
32
32
  from .constants import CURRENT_USER, START, END, ANY_USER
33
33
  from ..node import (
34
- EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode
34
+ EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode
35
35
  )
36
36
  from ..types import (
37
37
  AgentNodeSpec, extract_node_spec, FlowContext, FlowEventType, FlowEvent, FlowSpec,
@@ -190,7 +190,7 @@ class Flow(Node):
190
190
 
191
191
  def _add_schema_ref(self, schema: JsonSchemaObject, title: str = None) -> SchemaRef:
192
192
  '''Create a schema reference'''
193
- if schema and (schema.type == "object" or schema.type == "array"):
193
+ if schema and (schema.type == "object" or schema.type == "array" or schema.type == "string"):
194
194
  new_schema = self._add_schema(schema, title)
195
195
  return SchemaRef(ref=f"#/schemas/{new_schema.title}")
196
196
  raise AssertionError(f"schema is not a complex object: {schema}")
@@ -199,7 +199,7 @@ class Flow(Node):
199
199
  self._refactor_spec_to_schemaref(node.spec)
200
200
 
201
201
  def _refactor_spec_to_schemaref(self, spec: NodeSpec):
202
- if spec.input_schema:
202
+ if spec.input_schema and (spec.input_schema.type == "object" or spec.input_schema.type == "array") :
203
203
  if isinstance(spec.input_schema, ToolRequestBody):
204
204
  spec.input_schema = self._add_schema_ref(JsonSchemaObject(type = spec.input_schema.type,
205
205
  properties= spec.input_schema.properties,
@@ -433,6 +433,50 @@ class Flow(Node):
433
433
  node = self._add_node(node)
434
434
  return cast(PromptNode, node)
435
435
 
436
+ def docext(self,
437
+ name: str,
438
+ llm : str = "meta-llama/llama-3-2-11b-vision-instruct",
439
+ version: str = "TIP",
440
+ display_name: str| None = None,
441
+ input_entities: type[BaseModel]| None = None,
442
+ description: str | None = None,
443
+ input_map: DataMap = None) -> tuple[DocExtNode, type[BaseModel]]:
444
+
445
+ if name is None :
446
+ raise ValueError("name must be provided.")
447
+
448
+ doc_ext_config = DocExtNode.generate_config(llm=llm, input_entites=input_entities)
449
+
450
+ DocExtFieldValue = DocExtNode.generate_docext_field_value_model(input_entities=input_entities)
451
+
452
+ input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = File)
453
+ output_schema_obj = _get_json_schema_obj("output", DocExtFieldValue)
454
+
455
+ if "$defs" in output_schema_obj.model_extra:
456
+ output_schema_obj.model_extra.pop("$defs")
457
+
458
+ # Create the docext spec
459
+ task_spec = DocExtSpec(
460
+ name=name,
461
+ display_name=display_name if display_name is not None else name,
462
+ description=description,
463
+ input_schema=_get_tool_request_body(input_schema_obj),
464
+ output_schema=_get_tool_response_body(output_schema_obj),
465
+ output_schema_object = output_schema_obj,
466
+ config=doc_ext_config,
467
+ version=version
468
+ )
469
+ node = DocExtNode(spec=task_spec)
470
+
471
+ # setup input map
472
+ if input_map:
473
+ node.input_map = self._get_data_map(input_map)
474
+
475
+ # add the node to the list of node
476
+
477
+ node = self._add_node(node)
478
+ return cast(DocExtNode, node), DocExtFieldValue
479
+
436
480
  def decisions(self,
437
481
  name: str,
438
482
  display_name: str|None=None,
@@ -486,14 +530,12 @@ class Flow(Node):
486
530
  if name is None :
487
531
  raise ValueError("name must be provided.")
488
532
 
489
- if task is None:
490
- raise ValueError("task must be provided.")
491
533
 
492
534
  output_schema_dict = {
493
535
  "text_extraction" : TextExtractionResponse
494
536
  }
495
537
  # create input spec
496
- input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = File)
538
+ input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocProcInput)
497
539
  output_schema_obj = _get_json_schema_obj("output", output_schema_dict[task])
498
540
  if "$defs" in output_schema_obj.model_extra:
499
541
  output_schema_obj.model_extra.pop("$defs")
@@ -1018,8 +1060,8 @@ class FlowFactory(BaseModel):
1018
1060
  raise ValueError("Only functions with @flow_spec can be used to create a Flow specification.")
1019
1061
  return Flow(spec = flow_spec)
1020
1062
 
1021
- # create input spec
1022
1063
  input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = input_schema)
1064
+ # create input spec
1023
1065
  output_schema_obj = _get_json_schema_obj("output", output_schema)
1024
1066
  if initiators is None:
1025
1067
  initiators = []
@@ -1,11 +1,14 @@
1
1
  import json
2
- from typing import Any, cast
2
+ from typing import Any, cast, Type
3
3
  import uuid
4
4
 
5
5
  import yaml
6
- from pydantic import BaseModel, Field, SerializeAsAny
6
+ from pydantic import BaseModel, Field, SerializeAsAny, create_model
7
+ from enum import Enum
8
+
9
+ from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
10
+ DocExtSpec, DocExtConfig, LanguageCode, DecisionsNodeSpec
7
11
 
8
- from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, DecisionsNodeSpec
9
12
  from .data_map import DataMap
10
13
 
11
14
  class Node(BaseModel):
@@ -116,6 +119,34 @@ class DocProcNode(Node):
116
119
 
117
120
  def get_spec(self) -> DocProcSpec:
118
121
  return cast(DocProcSpec, self.spec)
122
+
123
+ class DocExtNode(Node):
124
+ def __repr__(self):
125
+ return f"DocExtNode(name='{self.spec.name}', description='{self.spec.description}')"
126
+
127
+ def get_spec(self) -> DocExtSpec:
128
+ return cast(DocExtSpec, self.spec)
129
+
130
+ @staticmethod
131
+ def generate_config(llm: str, input_entites: type[BaseModel]) -> DocExtConfig:
132
+ entities = input_entites.__dict__.values()
133
+ return DocExtConfig(llm=llm, entities=entities)
134
+
135
+ @staticmethod
136
+ def generate_docext_field_value_model(input_entities: type[BaseModel]) -> type[BaseModel]:
137
+ create_field_value_description = lambda field_name: "Extracted value for " + field_name
138
+
139
+ DocExtFieldValue = create_model(
140
+ "DocExtFieldValue",
141
+ **{
142
+ name: (str, Field(
143
+ title=value['name'],
144
+ description=create_field_value_description(value['name']),
145
+ )
146
+ )
147
+ for name, value in input_entities.model_dump().items()})
148
+ return DocExtFieldValue
149
+
119
150
  class DecisionsNode(Node):
120
151
  def __repr__(self):
121
152
  return f"DecisionsNode(name='{self.spec.name}', description='{self.spec.description}')"
@@ -5,11 +5,14 @@ import numbers
5
5
  import inspect
6
6
  import logging
7
7
  from typing import (
8
- Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union
8
+ Annotated, Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union, NewType
9
9
  )
10
+ from typing_extensions import Doc
10
11
 
11
12
  import docstring_parser
12
- from pydantic import BaseModel, Field
13
+ from pydantic import BaseModel, Field, GetCoreSchemaHandler, GetJsonSchemaHandler, RootModel
14
+ from pydantic_core import core_schema
15
+ from pydantic.json_schema import JsonSchemaValue
13
16
 
14
17
  from langchain_core.tools.base import create_schema_from_function
15
18
  from langchain_core.utils.json_schema import dereference_refs
@@ -86,7 +89,11 @@ def _to_json_from_input_schema(schema: Union[ToolRequestBody, SchemaRef]) -> dic
86
89
  model_spec["properties"] = {}
87
90
  for prop_name, prop_schema in request_body.properties.items():
88
91
  model_spec["properties"][prop_name] = _to_json_from_json_schema(prop_schema)
89
- model_spec["required"] = request_body.required
92
+ model_spec["required"] = request_body.required if request_body.required else []
93
+ if schema.model_extra:
94
+ for k, v in schema.model_extra.items():
95
+ model_spec[k] = v
96
+
90
97
  elif isinstance(schema, SchemaRef):
91
98
  model_spec["$ref"] = schema.ref
92
99
 
@@ -163,14 +170,99 @@ class NodeSpec(BaseModel):
163
170
 
164
171
  return model_spec
165
172
 
173
+ class DocExtConfigEntity(BaseModel):
174
+ name: str = Field(description="Entity name")
175
+ type: Literal["string", "date", "number"] = Field(default="string", description="The type of the entity values")
176
+ description: str = Field(title="Description", description="Description of the entity", default="")
177
+ field_name: str = Field(title="Field Name", description="The normalized name of the entity", default="")
178
+ multiple_mentions: bool = Field(title="Multiple mentions",description="When true, we can produce multiple mentions of this entity", default=False)
179
+ example_value: str = Field(description="Value of example", default="")
180
+ examples: list[str] = Field(title="Examples", description="Examples that help the LLM understand the expected entity mentions", default=[])
181
+
182
+ class DocExtConfig(BaseModel):
183
+ domain: str = Field(description="Domiain of the document", default="other")
184
+ type: str = Field(description="Document type", default="agreement")
185
+ llm: str = Field(description="The LLM used for the document extraction", default="meta-llama/llama-3-2-11b-vision-instruct")
186
+ entities: list[DocExtConfigEntity] = Field(default=[])
187
+
188
+ class LanguageCode(StrEnum):
189
+ en = auto()
190
+ fr = auto()
191
+
192
+ class DocProcCommonNodeSpec(NodeSpec):
193
+ enable_hw: bool | None = Field(description="Boolean value indicating if hand-written feature is enabled.", title="Enable handwritten", default=False)
194
+
195
+ class DocExtSpec(DocProcCommonNodeSpec):
196
+ version : str = Field(description="A version of the spec")
197
+ config : DocExtConfig
198
+
199
+ def __init__(self, **data):
200
+ super().__init__(**data)
201
+ self.kind = "docext"
202
+
203
+ def to_json(self) -> dict[str, Any]:
204
+ model_spec = super().to_json()
205
+ model_spec["version"] = self.version
206
+ model_spec["config"] = self.config.model_dump()
207
+ return model_spec
208
+
209
+ class DocProcField(BaseModel):
210
+ description: str = Field(description="A description of the field to extract from the document.")
211
+ example: str = Field(description="An example of the field to extract from the document.", default='')
212
+ default: Optional[str] = Field(description="A default value for the field to extract from the document.", default='')
213
+
214
+ class DocProcTable(BaseModel):
215
+ type: Literal["array"]
216
+ description: str = Field(description="A description of the table to extract from the document.")
217
+ columns: dict[str,DocProcField] = Field(description="The columns to extract from the table. These are the keys in the table extraction result.")
218
+
219
+ class DocProcKVPSchema(BaseModel):
220
+ document_type: str = Field(description="A label for the kind of documents we want to extract")
221
+ document_description: str = Field(description="A description of the kind of documents we want to extractI. This is used to select which schema to use for extraction.")
222
+ fields: dict[str, DocProcField | DocProcTable] = Field(description="The fields to extract from the document. These are the keys in the KVP extraction result.")
223
+
224
+ class DocProcBoundingBox(BaseModel):
225
+ x: float = Field(description="The x coordinate of the bounding box.")
226
+ y: float = Field(description="The y coordinate of the bounding box.")
227
+ width: float = Field(description="The width of the bounding box.")
228
+ height: float = Field(description="The height of the bounding box.")
229
+ page_number: int = Field(description="The page number of the bounding box in the document.")
230
+
231
+ class KVPBaseEntry(BaseModel):
232
+ id: str = Field(description="A unique identifier.")
233
+ raw_text: str = Field(description="The raw text.")
234
+ normalized_text: Optional[str] = Field(description="The normalized text.", default=None)
235
+ confidence_score: Optional[float] = Field(description="The confidence score.", default=None)
236
+ bbox: Optional[DocProcBoundingBox] = Field(description="The bounding box in the document.", default=None)
237
+
238
+ class DocProcKey(KVPBaseEntry):
239
+ semantic_label: str = Field(description="A semantic label for the key.")
240
+
241
+ class DocProcValue(KVPBaseEntry):
242
+ pass
243
+
244
+ class DocProcKVP(BaseModel):
245
+ id: str = Field(description="A unique identifier for the key-value pair.")
246
+ type: Literal["key_value","only_value"]
247
+ key: DocProcKey = Field(description="The key of the key-value pair.")
248
+ value: DocProcValue = Field(description="The value of the key-value pair.")
249
+ group_id: Optional[str] = Field(default=None, description="The group id of the key-value pair. This is used to group key-value pairs together.")
250
+ table_id: Optional[str] = Field(default=None, description="The table id of the key-value pair. This is used to group key-value pairs together in a table.")
251
+ table_name: Optional[str] = Field(default=None, description="The name of the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
252
+ table_row_index: Optional[int] = Field(default=None, description="The index of the row in the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
253
+
166
254
  class DocProcTask(StrEnum):
167
255
  '''
168
256
  Possible names for the Document processing task parameter
169
257
  '''
170
258
  text_extraction = auto()
171
259
 
172
- class DocProcSpec(NodeSpec):
260
+ class DocProcSpec(DocProcCommonNodeSpec):
173
261
  task: DocProcTask = Field(description='The document processing operation name', default=DocProcTask.text_extraction)
262
+ kvp_schema: List[DocProcKVPSchema] | None = Field(
263
+ title='KVP schemas',
264
+ description="Optional list of key-value pair schemas to use for extraction.",
265
+ default=None)
174
266
 
175
267
  def __init__(self, **data):
176
268
  super().__init__(**data)
@@ -673,6 +765,7 @@ class TaskEventType(Enum):
673
765
  ON_TASK_END = "task:on_task_end"
674
766
  ON_TASK_STREAM = "task:on_task_stream"
675
767
  ON_TASK_ERROR = "task:on_task_error"
768
+ ON_TASK_RESUME= "task:on_task_resume"
676
769
 
677
770
  class FlowData(BaseModel):
678
771
  '''This class represents the data that is passed between tasks in a flow.'''
@@ -707,7 +800,7 @@ class FlowEventType(Enum):
707
800
  ON_FLOW_START = "flow:on_flow_start"
708
801
  ON_FLOW_END = "flow:on_flow_end"
709
802
  ON_FLOW_ERROR = "flow:on_flow_error"
710
-
803
+ ON_FLOW_RESUME = "flow:on_flow_resume"
711
804
 
712
805
  @dataclass
713
806
  class FlowEvent:
@@ -743,41 +836,67 @@ class LanguageCode(StrEnum):
743
836
  fr = auto()
744
837
  en_hw = auto()
745
838
 
746
- class File(BaseModel):
839
+
840
+ class File(str):
841
+ @classmethod
842
+ def __get_pydantic_core_schema__(
843
+ cls, source_type: Any, handler: GetCoreSchemaHandler
844
+ ) -> core_schema.CoreSchema:
845
+ return core_schema.no_info_wrap_validator_function(
846
+ cls.validate,
847
+ core_schema.str_schema(),
848
+ serialization=core_schema.plain_serializer_function_ser_schema(lambda v: str(v))
849
+ )
850
+
851
+ @classmethod
852
+ def validate(cls, value: Any) -> "File":
853
+ if not isinstance(value, str):
854
+ raise TypeError("File must be a document reference (string)")
855
+ return cls(value)
856
+
857
+ @classmethod
858
+ def __get_pydantic_json_schema__(
859
+ cls, core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler
860
+ ) -> JsonSchemaValue:
861
+ return {
862
+ "type": "string",
863
+ "title": "Document reference",
864
+ "format": "binary",
865
+ "description": "Either an ID or a URL identifying the document to be used.",
866
+ "wrap_data": False,
867
+ "required": []
868
+ }
869
+
870
+ class DocExtInput(BaseModel):
871
+ document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
872
+
873
+
874
+ class DocProcInput(BaseModel):
747
875
  '''
748
876
  This class represents the input of a Document processing task.
749
877
 
750
878
  Attributes:
751
879
  document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
752
880
  language (LanguageCode): Optional language code used when processing the input document
881
+ kvp_schemas (List[DocProcKVPSchema]): Optional list of key-value pair schemas to use for extraction. If not provided or None, no KVPs will be extracted. If an empty list is provided, we will use the internal schemas to extract KVPS.
753
882
  '''
754
883
  # This is declared as bytes but the runtime will understand if a URL is send in as input.
755
884
  # We need to use bytes here for Chat-with-doc to recognize the input as a File.
756
- document_ref: bytes | str = Field(
757
- description="Either an ID or a URL identifying the document to be used.",
758
- title='Document reference',
759
- default=None,
760
- json_schema_extra={"format": "binary"})
761
- language: Optional[LanguageCode] = Field(
762
- description='Optional language code of the document, defaults to "en"',
763
- title='Document language code',
764
- default=LanguageCode.en)
765
-
766
- class TextExtraction(BaseModel):
767
- '''
768
- This class represents the output generated by a "text_extraction" document processing (docproc) operation.
769
- Attributes:
770
- text (str): the text extracted from the input document.
771
- '''
772
- text: str = Field(description='The text extracted from the input document', title='Text extraction')
885
+ document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
886
+ kvp_schemas: Optional[List[DocProcKVPSchema]] = Field(
887
+ title='KVP schemas',
888
+ description="Optional list of key-value pair schemas to use for extraction.",
889
+ default=None)
773
890
 
774
891
  class TextExtractionResponse(BaseModel):
775
892
  '''
776
893
  The text extraction operation response.
777
894
  Attributes:
778
- output (TextExtraction): a wrapper for the text extraction response
895
+ text (str): the text extracted from the input document.
896
+ kvps (Optional[list[DocProcKVP]]): A list of key-value pairs extracted from the document. If no KVPs were extracted, this will be None.
779
897
  '''
780
- output: TextExtraction = Field(description='The text extraction response')
898
+ text: str = Field(description='The text extracted from the input document', title='text')
899
+ kvps: Optional[list[DocProcKVP]] = Field(description="A list of key-value pairs extracted from the document.", default=None)
781
900
 
782
901
 
783
902
  class DecisionsCondition(BaseModel):
@@ -7,7 +7,6 @@ from pydantic import BaseModel, TypeAdapter
7
7
 
8
8
  from langchain_core.utils.json_schema import dereference_refs
9
9
  import typer
10
- import yaml
11
10
 
12
11
  from ibm_watsonx_orchestrate.agent_builder.tools.base_tool import BaseTool
13
12
  from ibm_watsonx_orchestrate.agent_builder.tools.flow_tool import create_flow_json_tool
@@ -90,9 +89,13 @@ def _get_tool_request_body(schema_obj: JsonSchemaObject) -> ToolRequestBody:
90
89
  request_obj = ToolRequestBody(type='object', properties=schema_obj.properties, required=schema_obj.required)
91
90
  if schema_obj.model_extra:
92
91
  request_obj.__pydantic_extra__ = schema_obj.model_extra
93
- else: # we need to wrap a simple type with an object
94
- request_obj = ToolRequestBody(type='object', properties={}, required=[])
95
- request_obj.properties["data"] = schema_obj
92
+ else:
93
+ if schema_obj.wrap_data:
94
+ # we need to wrap a simple type with an object
95
+ request_obj = ToolRequestBody(type='object', properties={}, required=[])
96
+ request_obj.properties["data"] = schema_obj
97
+ else:
98
+ request_obj = ToolRequestBody(type=schema_obj.type, title=schema_obj.title, description=schema_obj.description, format=schema_obj.format)
96
99
  if schema_obj.model_extra:
97
100
  request_obj.__pydantic_extra__ = schema_obj.model_extra
98
101
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ibm-watsonx-orchestrate
3
- Version: 1.8.1
3
+ Version: 1.9.0
4
4
  Summary: IBM watsonx.orchestrate SDK
5
5
  Author-email: IBM <support@ibm.com>
6
6
  License: MIT License