ibm-watsonx-orchestrate 1.9.0b2__py3-none-any.whl → 1.10.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. ibm_watsonx_orchestrate/__init__.py +2 -1
  2. ibm_watsonx_orchestrate/agent_builder/agents/types.py +2 -0
  3. ibm_watsonx_orchestrate/agent_builder/connections/__init__.py +1 -1
  4. ibm_watsonx_orchestrate/agent_builder/connections/connections.py +1 -1
  5. ibm_watsonx_orchestrate/agent_builder/connections/types.py +16 -12
  6. ibm_watsonx_orchestrate/agent_builder/knowledge_bases/types.py +47 -3
  7. ibm_watsonx_orchestrate/agent_builder/toolkits/types.py +18 -15
  8. ibm_watsonx_orchestrate/agent_builder/tools/python_tool.py +19 -7
  9. ibm_watsonx_orchestrate/agent_builder/tools/types.py +1 -1
  10. ibm_watsonx_orchestrate/agent_builder/voice_configurations/__init__.py +1 -0
  11. ibm_watsonx_orchestrate/agent_builder/voice_configurations/types.py +98 -0
  12. ibm_watsonx_orchestrate/cli/commands/agents/agents_command.py +20 -0
  13. ibm_watsonx_orchestrate/cli/commands/agents/agents_controller.py +170 -1
  14. ibm_watsonx_orchestrate/cli/commands/connections/connections_command.py +7 -7
  15. ibm_watsonx_orchestrate/cli/commands/connections/connections_controller.py +36 -26
  16. ibm_watsonx_orchestrate/cli/commands/knowledge_bases/knowledge_bases_controller.py +51 -22
  17. ibm_watsonx_orchestrate/cli/commands/server/server_command.py +110 -16
  18. ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_command.py +43 -10
  19. ibm_watsonx_orchestrate/cli/commands/toolkit/toolkit_controller.py +52 -25
  20. ibm_watsonx_orchestrate/cli/commands/tools/tools_controller.py +5 -0
  21. ibm_watsonx_orchestrate/cli/commands/voice_configurations/voice_configurations_command.py +58 -0
  22. ibm_watsonx_orchestrate/cli/commands/voice_configurations/voice_configurations_controller.py +173 -0
  23. ibm_watsonx_orchestrate/cli/main.py +2 -0
  24. ibm_watsonx_orchestrate/client/agents/agent_client.py +64 -1
  25. ibm_watsonx_orchestrate/client/connections/connections_client.py +4 -3
  26. ibm_watsonx_orchestrate/client/knowledge_bases/knowledge_base_client.py +4 -4
  27. ibm_watsonx_orchestrate/client/voice_configurations/voice_configurations_client.py +75 -0
  28. ibm_watsonx_orchestrate/docker/compose-lite.yml +54 -5
  29. ibm_watsonx_orchestrate/docker/default.env +21 -13
  30. ibm_watsonx_orchestrate/flow_builder/flows/__init__.py +2 -0
  31. ibm_watsonx_orchestrate/flow_builder/flows/flow.py +115 -31
  32. ibm_watsonx_orchestrate/flow_builder/node.py +39 -15
  33. ibm_watsonx_orchestrate/flow_builder/types.py +114 -25
  34. ibm_watsonx_orchestrate/run/connections.py +2 -2
  35. {ibm_watsonx_orchestrate-1.9.0b2.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/METADATA +1 -1
  36. {ibm_watsonx_orchestrate-1.9.0b2.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/RECORD +39 -34
  37. {ibm_watsonx_orchestrate-1.9.0b2.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/WHEEL +0 -0
  38. {ibm_watsonx_orchestrate-1.9.0b2.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/entry_points.txt +0 -0
  39. {ibm_watsonx_orchestrate-1.9.0b2.dist-info → ibm_watsonx_orchestrate-1.10.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -25,13 +25,13 @@ from ibm_watsonx_orchestrate.client.tools.tool_client import ToolClient
25
25
  from ibm_watsonx_orchestrate.client.tools.tempus_client import TempusClient
26
26
  from ibm_watsonx_orchestrate.client.utils import instantiate_client
27
27
  from ..types import (
28
- EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PromptLLMParameters, PromptNodeSpec,
28
+ DocProcKVPSchema, EndNodeSpec, Expression, ForeachPolicy, ForeachSpec, LoopSpec, BranchNodeSpec, MatchPolicy, PlainTextReadingOrder, PromptLLMParameters, PromptNodeSpec, TimerNodeSpec,
29
29
  StartNodeSpec, ToolSpec, JsonSchemaObject, ToolRequestBody, ToolResponseBody, UserFieldKind, UserFieldOption, UserFlowSpec, UserNodeSpec, WaitPolicy,
30
- DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File
30
+ DocProcSpec, TextExtractionResponse, DocProcInput, DecisionsNodeSpec, DecisionsRule, DocExtSpec, File, DocumentClassificationResponse, DocClassifierSpec, DocumentProcessingCommonInput
31
31
  )
32
32
  from .constants import CURRENT_USER, START, END, ANY_USER
33
33
  from ..node import (
34
- EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode
34
+ EndNode, Node, PromptNode, StartNode, UserNode, AgentNode, DataMap, ToolNode, DocProcNode, DecisionsNode, DocExtNode, DocClassifierNode
35
35
  )
36
36
  from ..types import (
37
37
  AgentNodeSpec, extract_node_spec, FlowContext, FlowEventType, FlowEvent, FlowSpec,
@@ -115,6 +115,10 @@ class Flow(Node):
115
115
  # we need a deep compare if the incoming schema and existing_schema is the same
116
116
  # pydantic suppport nested comparison by default
117
117
 
118
+ if isinstance(schema, dict):
119
+ # recast schema to support direct access
120
+ schema = JsonSchemaObject.model_validate(schema)
121
+
118
122
  schema.title = title
119
123
 
120
124
  if schema == existing_schema:
@@ -199,30 +203,31 @@ class Flow(Node):
199
203
  self._refactor_spec_to_schemaref(node.spec)
200
204
 
201
205
  def _refactor_spec_to_schemaref(self, spec: NodeSpec):
202
- if spec.input_schema and (spec.input_schema.type == "object" or spec.input_schema.type == "array") :
206
+ if spec.input_schema and not isinstance(spec.input_schema, SchemaRef) and (spec.input_schema.type == "object" or spec.input_schema.type == "array") :
203
207
  if isinstance(spec.input_schema, ToolRequestBody):
204
208
  spec.input_schema = self._add_schema_ref(JsonSchemaObject(type = spec.input_schema.type,
205
209
  properties= spec.input_schema.properties,
206
210
  required= spec.input_schema.required),
207
211
  f"{spec.name}_input")
208
- if spec.output_schema_object is not None and spec.output_schema_object.type == "object":
209
- spec.output_schema = self._add_schema_ref(spec.output_schema_object, spec.output_schema_object.title)
210
- spec.output_schema_object = None
211
- elif spec.output_schema is not None:
212
- if isinstance(spec.output_schema, ToolResponseBody):
213
- if spec.output_schema.type == "object":
214
- json_obj = JsonSchemaObject(type = spec.output_schema.type,
215
- description=spec.output_schema.description,
216
- properties= spec.output_schema.properties,
217
- items = spec.output_schema.items,
218
- uniqueItems=spec.output_schema.uniqueItems,
219
- anyOf=spec.output_schema.anyOf,
220
- required= spec.output_schema.required)
221
- spec.output_schema = self._add_schema_ref(json_obj, f"{spec.name}_output")
222
- elif spec.output_schema.type == "array":
223
- if hasattr(spec.output_schema, "items") and hasattr(spec.output_schema.items, "type") and spec.output_schema.items.type == "object":
224
- schema_ref = self._add_schema_ref(spec.output_schema.items)
225
- spec.output_schema.items = JsonSchemaObjectRef(ref=f"{schema_ref.ref}")
212
+ if not isinstance(spec.output_schema, SchemaRef):
213
+ if spec.output_schema_object is not None and spec.output_schema_object.type == "object":
214
+ spec.output_schema = self._add_schema_ref(spec.output_schema_object, spec.output_schema_object.title)
215
+ spec.output_schema_object = None
216
+ elif spec.output_schema is not None:
217
+ if isinstance(spec.output_schema, ToolResponseBody):
218
+ if spec.output_schema.type == "object":
219
+ json_obj = JsonSchemaObject(type = spec.output_schema.type,
220
+ description=spec.output_schema.description,
221
+ properties= spec.output_schema.properties,
222
+ items = spec.output_schema.items,
223
+ uniqueItems=spec.output_schema.uniqueItems,
224
+ anyOf=spec.output_schema.anyOf,
225
+ required= spec.output_schema.required)
226
+ spec.output_schema = self._add_schema_ref(json_obj, f"{spec.name}_output")
227
+ elif spec.output_schema.type == "array":
228
+ if hasattr(spec.output_schema, "items") and hasattr(spec.output_schema.items, "type") and spec.output_schema.items.type == "object":
229
+ schema_ref = self._add_schema_ref(spec.output_schema.items)
230
+ spec.output_schema.items = JsonSchemaObjectRef(ref=f"{schema_ref.ref}")
226
231
 
227
232
  # def refactor_datamap_spec_to_schemaref(self, spec: FnDataMapSpec):
228
233
  # '''TODO'''
@@ -433,23 +438,95 @@ class Flow(Node):
433
438
  node = self._add_node(node)
434
439
  return cast(PromptNode, node)
435
440
 
441
+ def docclassfier(self,
442
+ name: str,
443
+ llm : str = "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
444
+ version: str = "TIP",
445
+ display_name: str| None = None,
446
+ classes: type[BaseModel]| None = None,
447
+ description: str | None = None,
448
+ min_confidence: float = 0.0,
449
+ input_map: DataMap = None) -> DocClassifierNode:
450
+
451
+ if name is None :
452
+ raise ValueError("name must be provided.")
453
+
454
+ doc_classifier_config = DocClassifierNode.generate_config(llm=llm, min_confidence=min_confidence,input_classes=classes)
455
+
456
+ input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocumentProcessingCommonInput)
457
+ output_schema_obj = _get_json_schema_obj(parameter_name = "output", type_def = DocumentClassificationResponse)
458
+
459
+ if "$defs" in output_schema_obj.model_extra:
460
+ output_schema_obj.model_extra.pop("$defs")
461
+ # Create the docclassifier spec
462
+ task_spec = DocClassifierSpec(
463
+ name=name,
464
+ display_name=display_name if display_name is not None else name,
465
+ description=description,
466
+ input_schema=_get_tool_request_body(input_schema_obj),
467
+ output_schema=_get_tool_response_body(output_schema_obj),
468
+ output_schema_object = output_schema_obj,
469
+ config=doc_classifier_config,
470
+ version=version
471
+ )
472
+ node = DocClassifierNode(spec=task_spec)
473
+
474
+ # setup input map
475
+ if input_map:
476
+ node.input_map = self._get_data_map(input_map)
477
+
478
+ # add the node to the list of node
479
+
480
+ node = self._add_node(node)
481
+ return cast(DocClassifierNode, node)
482
+
483
+
484
+ def timer(self,
485
+ name: str,
486
+ delay: int,
487
+ display_name: str | None = None,
488
+ description: str | None = None,
489
+ input_map: DataMap = None) -> Node:
490
+
491
+ if name is None:
492
+ raise ValueError("name must be provided.")
493
+ if delay < 0:
494
+ raise ValueError("delay must be non-negative.")
495
+
496
+ timer_spec = TimerNodeSpec(
497
+ name=name,
498
+ display_name=display_name if display_name is not None else name,
499
+ description=description,
500
+ delay=delay
501
+ )
502
+
503
+ node = Node(spec=timer_spec)
504
+
505
+ if input_map:
506
+ node.input_map = self._get_data_map(input_map)
507
+
508
+ node = self._add_node(node)
509
+ return node
510
+
511
+
436
512
  def docext(self,
437
513
  name: str,
438
- llm : str = "meta-llama/llama-3-2-11b-vision-instruct",
514
+ llm : str = "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
439
515
  version: str = "TIP",
440
516
  display_name: str| None = None,
441
- input_entities: type[BaseModel]| None = None,
517
+ fields: type[BaseModel]| None = None,
442
518
  description: str | None = None,
443
- input_map: DataMap = None) -> tuple[DocExtNode, type[BaseModel]]:
519
+ input_map: DataMap = None,
520
+ enable_hw: bool = False) -> tuple[DocExtNode, type[BaseModel]]:
444
521
 
445
522
  if name is None :
446
523
  raise ValueError("name must be provided.")
447
524
 
448
- doc_ext_config = DocExtNode.generate_config(llm=llm, input_entites=input_entities)
525
+ doc_ext_config = DocExtNode.generate_config(llm=llm, fields=fields)
449
526
 
450
- DocExtFieldValue = DocExtNode.generate_docext_field_value_model(input_entities=input_entities)
527
+ DocExtFieldValue = DocExtNode.generate_docext_field_value_model(fields=fields)
451
528
 
452
- input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = File)
529
+ input_schema_obj = _get_json_schema_obj(parameter_name = "input", type_def = DocumentProcessingCommonInput)
453
530
  output_schema_obj = _get_json_schema_obj("output", DocExtFieldValue)
454
531
 
455
532
  if "$defs" in output_schema_obj.model_extra:
@@ -464,7 +541,8 @@ class Flow(Node):
464
541
  output_schema=_get_tool_response_body(output_schema_obj),
465
542
  output_schema_object = output_schema_obj,
466
543
  config=doc_ext_config,
467
- version=version
544
+ version=version,
545
+ enable_hw=enable_hw
468
546
  )
469
547
  node = DocExtNode(spec=task_spec)
470
548
 
@@ -523,9 +601,12 @@ class Flow(Node):
523
601
  def docproc(self,
524
602
  name: str,
525
603
  task: str,
604
+ plain_text_reading_order : PlainTextReadingOrder = PlainTextReadingOrder.block_structure,
526
605
  display_name: str|None=None,
527
606
  description: str | None = None,
528
- input_map: DataMap = None) -> DocProcNode:
607
+ input_map: DataMap = None,
608
+ kvp_schemas: list[DocProcKVPSchema] = None,
609
+ enable_hw: bool = False) -> DocProcNode:
529
610
 
530
611
  if name is None :
531
612
  raise ValueError("name must be provided.")
@@ -547,7 +628,10 @@ class Flow(Node):
547
628
  input_schema=_get_tool_request_body(input_schema_obj),
548
629
  output_schema=_get_tool_response_body(output_schema_obj),
549
630
  output_schema_object = output_schema_obj,
550
- task=task
631
+ task=task,
632
+ plain_text_reading_order=plain_text_reading_order,
633
+ enable_hw=enable_hw,
634
+ kvp_schemas=kvp_schemas
551
635
  )
552
636
 
553
637
  node = DocProcNode(spec=task_spec)
@@ -6,8 +6,8 @@ import yaml
6
6
  from pydantic import BaseModel, Field, SerializeAsAny, create_model
7
7
  from enum import Enum
8
8
 
9
- from .types import EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
10
- DocExtSpec, DocExtConfig, LanguageCode, DecisionsNodeSpec
9
+ from .types import DocExtConfigField, EndNodeSpec, NodeSpec, AgentNodeSpec, PromptNodeSpec, TimerNodeSpec, StartNodeSpec, ToolNodeSpec, UserFieldKind, UserFieldOption, UserNodeSpec, DocProcSpec, \
10
+ DocExtSpec, DocExtConfig, DocClassifierSpec, DecisionsNodeSpec, DocClassifierConfig
11
11
 
12
12
  from .data_map import DataMap
13
13
 
@@ -120,6 +120,24 @@ class DocProcNode(Node):
120
120
  def get_spec(self) -> DocProcSpec:
121
121
  return cast(DocProcSpec, self.spec)
122
122
 
123
+ class DocClassifierNode(Node):
124
+ def __repr__(self):
125
+ return f"DocClassifierNode(name='{self.spec.name}', description='{self.spec.description}')"
126
+
127
+ def get_spec(self) -> DocClassifierSpec:
128
+ return cast(DocClassifierSpec, self.spec)
129
+
130
+ @staticmethod
131
+ def generate_config(llm: str, input_classes: type[BaseModel], min_confidence: float) -> DocClassifierConfig:
132
+ return DocClassifierConfig(llm=llm, classes=input_classes.__dict__.values(), min_confidence=min_confidence)
133
+
134
+ class TimerNode(Node):
135
+ def __repr__(self):
136
+ return f"TimerNode(name='{self.spec.name}', description='{self.spec.description}')"
137
+
138
+ def get_spec(self) -> TimerNodeSpec:
139
+ return cast(TimerNodeSpec, self.spec)
140
+
123
141
  class DocExtNode(Node):
124
142
  def __repr__(self):
125
143
  return f"DocExtNode(name='{self.spec.name}', description='{self.spec.description}')"
@@ -128,23 +146,29 @@ class DocExtNode(Node):
128
146
  return cast(DocExtSpec, self.spec)
129
147
 
130
148
  @staticmethod
131
- def generate_config(llm: str, input_entites: type[BaseModel]) -> DocExtConfig:
132
- entities = input_entites.__dict__.values()
133
- return DocExtConfig(llm=llm, entities=entities)
149
+ def generate_config(llm: str, fields: type[BaseModel]) -> DocExtConfig:
150
+ return DocExtConfig(llm=llm, fields=fields.__dict__.values())
134
151
 
135
152
  @staticmethod
136
- def generate_docext_field_value_model(input_entities: type[BaseModel]) -> type[BaseModel]:
153
+ def generate_docext_field_value_model(fields: type[BaseModel]) -> type[BaseModel]:
137
154
  create_field_value_description = lambda field_name: "Extracted value for " + field_name
155
+ field_definitions = {}
156
+
157
+ for name, value in fields.model_dump().items():
158
+ field_type = str
159
+ field_kwargs = {
160
+ "title": value['name'],
161
+ "description": create_field_value_description(value['name']),
162
+ "type": value["type"] if value["type"] != "date" else "string"
163
+ }
164
+
165
+ # Add json_schema_extra if type is 'date'
166
+ if value["type"] == "date":
167
+ field_kwargs["json_schema_extra"] = {"format": "date"}
168
+
169
+ field_definitions[name] = (field_type, Field(**field_kwargs))
138
170
 
139
- DocExtFieldValue = create_model(
140
- "DocExtFieldValue",
141
- **{
142
- name: (str, Field(
143
- title=value['name'],
144
- description=create_field_value_description(value['name']),
145
- )
146
- )
147
- for name, value in input_entities.model_dump().items()})
171
+ DocExtFieldValue = create_model("DocExtFieldValue", **field_definitions)
148
172
  return DocExtFieldValue
149
173
 
150
174
  class DecisionsNode(Node):
@@ -4,12 +4,16 @@ from datetime import date
4
4
  import numbers
5
5
  import inspect
6
6
  import logging
7
+ import uuid
8
+ import re
9
+ import time
7
10
  from typing import (
8
11
  Annotated, Any, Callable, Self, cast, Literal, List, NamedTuple, Optional, Sequence, Union, NewType
9
12
  )
10
13
  from typing_extensions import Doc
11
14
 
12
15
  import docstring_parser
16
+ from pydantic import computed_field, field_validator
13
17
  from pydantic import BaseModel, Field, GetCoreSchemaHandler, GetJsonSchemaHandler, RootModel
14
18
  from pydantic_core import core_schema
15
19
  from pydantic.json_schema import JsonSchemaValue
@@ -124,7 +128,7 @@ def _to_json_from_output_schema(schema: Union[ToolResponseBody, SchemaRef]) -> d
124
128
  return model_spec
125
129
 
126
130
  class NodeSpec(BaseModel):
127
- kind: Literal["node", "tool", "user", "agent", "flow", "start", "decisions", "prompt", "branch", "wait", "foreach", "loop", "userflow", "end", "docproc" ] = "node"
131
+ kind: Literal["node", "tool", "user", "agent", "flow", "start", "decisions", "prompt", "timer", "branch", "wait", "foreach", "loop", "userflow", "end", "docproc", "docext", "docclassifier" ] = "node"
128
132
  name: str
129
133
  display_name: str | None = None
130
134
  description: str | None = None
@@ -170,7 +174,7 @@ class NodeSpec(BaseModel):
170
174
 
171
175
  return model_spec
172
176
 
173
- class DocExtConfigEntity(BaseModel):
177
+ class DocExtConfigField(BaseModel):
174
178
  name: str = Field(description="Entity name")
175
179
  type: Literal["string", "date", "number"] = Field(default="string", description="The type of the entity values")
176
180
  description: str = Field(title="Description", description="Description of the entity", default="")
@@ -180,18 +184,89 @@ class DocExtConfigEntity(BaseModel):
180
184
  examples: list[str] = Field(title="Examples", description="Examples that help the LLM understand the expected entity mentions", default=[])
181
185
 
182
186
  class DocExtConfig(BaseModel):
183
- domain: str = Field(description="Domiain of the document", default="other")
187
+ domain: str = Field(description="Domain of the document", default="other")
184
188
  type: str = Field(description="Document type", default="agreement")
185
189
  llm: str = Field(description="The LLM used for the document extraction", default="meta-llama/llama-3-2-11b-vision-instruct")
186
- entities: list[DocExtConfigEntity] = Field(default=[])
190
+ fields: list[DocExtConfigField] = Field(default=[])
187
191
 
188
192
  class LanguageCode(StrEnum):
189
193
  en = auto()
190
194
  fr = auto()
191
195
 
196
+ class DocProcTask(StrEnum):
197
+ '''
198
+ Possible names for the Document processing task parameter
199
+ '''
200
+ text_extraction = auto()
201
+ custom_field_extraction = auto()
202
+ custom_document_classification = auto()
203
+
204
+ class CustomClassOutput(BaseModel):
205
+ class_name: str = Field(
206
+ title="Class Name",
207
+ description="Class Name of the Document",
208
+ default=[],
209
+ )
210
+
211
+ class DocumentClassificationResponse(BaseModel):
212
+ custom_class_response: CustomClassOutput = Field(
213
+ title="Custom Classification",
214
+ description="The Class extracted by the llm",
215
+ )
216
+
217
+ class DocClassifierClass(BaseModel):
218
+ class_name: str = Field(title='Class Name', description="The predicted, normalized document class name based on provided name")
219
+
220
+ @field_validator("class_name", mode="before")
221
+ @classmethod
222
+ def normalize_name(cls, name) -> str:
223
+ pattern = r'^[a-zA-Z0-9_]{1,29}$'
224
+ if not re.match(pattern, name):
225
+ raise ValueError(f"class_name \"{name}\" is not valid. class_name should contain only letters (a-z, A-Z), digits (0-9), and underscores (_)")
226
+ return name
227
+
228
+ @computed_field(description="A uuid for identifying classes, For easy filtering of documents classified in a class", return_type=str)
229
+ def class_id(self) -> str:
230
+ return str(uuid.uuid5(uuid.uuid1(), self.class_name + str(time.time())))
231
+
232
+ class DocClassifierConfig(BaseModel):
233
+ domain: str = Field(description="Domain of the document", default="other",title="Domain")
234
+ type: Literal["class_configuration"] = Field(description="Document type", default="class_configuration",title="Type")
235
+ llm: str = Field(description="The LLM used for the document classfier", default="watsonx/meta-llama/llama-3-2-11b-vision-instruct",title="LLM")
236
+ min_confidence: float = Field(description="The minimal confidence acceptable for an extracted field value", default=0.0,le=1.0, ge=0.0 ,title="Minimum Confidence")
237
+ classes: list[DocClassifierClass] = Field(default=[], description="Classes which are needed to classify provided by user", title="Classes")
238
+
192
239
  class DocProcCommonNodeSpec(NodeSpec):
240
+ task: DocProcTask = Field(description='The document processing operation name', default=DocProcTask.text_extraction)
193
241
  enable_hw: bool | None = Field(description="Boolean value indicating if hand-written feature is enabled.", title="Enable handwritten", default=False)
194
242
 
243
+ def __init__(self, **data):
244
+ super().__init__(**data)
245
+
246
+ def to_json(self) -> dict[str, Any]:
247
+ model_spec = super().to_json()
248
+ model_spec["task"] = self.task
249
+ model_spec["enable_hw"] = self.enable_hw
250
+
251
+ return model_spec
252
+
253
+
254
+
255
+ class DocClassifierSpec(DocProcCommonNodeSpec):
256
+ version : str = Field(description="A version of the spec")
257
+ config : DocClassifierConfig
258
+
259
+ def __init__(self, **data):
260
+ super().__init__(**data)
261
+ self.kind = "docclassifier"
262
+
263
+ def to_json(self) -> dict[str, Any]:
264
+ model_spec = super().to_json()
265
+ model_spec["version"] = self.version
266
+ model_spec["config"] = self.config.model_dump()
267
+ model_spec["task"] = DocProcTask.custom_document_classification
268
+ return model_spec
269
+
195
270
  class DocExtSpec(DocProcCommonNodeSpec):
196
271
  version : str = Field(description="A version of the spec")
197
272
  config : DocExtConfig
@@ -204,6 +279,7 @@ class DocExtSpec(DocProcCommonNodeSpec):
204
279
  model_spec = super().to_json()
205
280
  model_spec["version"] = self.version
206
281
  model_spec["config"] = self.config.model_dump()
282
+ model_spec["task"] = DocProcTask.custom_field_extraction
207
283
  return model_spec
208
284
 
209
285
  class DocProcField(BaseModel):
@@ -251,19 +327,17 @@ class DocProcKVP(BaseModel):
251
327
  table_name: Optional[str] = Field(default=None, description="The name of the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
252
328
  table_row_index: Optional[int] = Field(default=None, description="The index of the row in the table the key-value pair belongs to. This is used to group key-value pairs together in a table.")
253
329
 
254
- class DocProcTask(StrEnum):
255
- '''
256
- Possible names for the Document processing task parameter
257
- '''
258
- text_extraction = auto()
330
+ class PlainTextReadingOrder(StrEnum):
331
+ block_structure = auto()
332
+ simple_line = auto()
259
333
 
260
334
  class DocProcSpec(DocProcCommonNodeSpec):
261
- task: DocProcTask = Field(description='The document processing operation name', default=DocProcTask.text_extraction)
262
- kvp_schema: List[DocProcKVPSchema] | None = Field(
335
+ kvp_schemas: List[DocProcKVPSchema] | None = Field(
263
336
  title='KVP schemas',
264
337
  description="Optional list of key-value pair schemas to use for extraction.",
265
338
  default=None)
266
-
339
+ plain_text_reading_order : PlainTextReadingOrder = Field(default=PlainTextReadingOrder.block_structure)
340
+
267
341
  def __init__(self, **data):
268
342
  super().__init__(**data)
269
343
  self.kind = "docproc"
@@ -271,8 +345,12 @@ class DocProcSpec(DocProcCommonNodeSpec):
271
345
  def to_json(self) -> dict[str, Any]:
272
346
  model_spec = super().to_json()
273
347
  model_spec["task"] = self.task
348
+ if self.plain_text_reading_order != PlainTextReadingOrder.block_structure:
349
+ model_spec["plain_text_reading_order"] = self.plain_text_reading_order
350
+ if self.kvp_schemas is not None:
351
+ model_spec["kvp_schemas"] = self.kvp_schemas
274
352
  return model_spec
275
-
353
+
276
354
  class StartNodeSpec(NodeSpec):
277
355
  def __init__(self, **data):
278
356
  super().__init__(**data)
@@ -607,6 +685,18 @@ class PromptNodeSpec(NodeSpec):
607
685
 
608
686
  return model_spec
609
687
 
688
+ class TimerNodeSpec(NodeSpec):
689
+ delay: int
690
+
691
+ def __init__(self, **kwargs):
692
+ super().__init__(**kwargs)
693
+ self.kind = "timer"
694
+
695
+ def to_json(self) -> dict[str, Any]:
696
+ model_spec = super().to_json()
697
+ if self.delay:
698
+ model_spec["delay"] = self.delay
699
+ return model_spec
610
700
 
611
701
  class Expression(BaseModel):
612
702
  '''An expression could return a boolean or a value'''
@@ -866,24 +956,25 @@ class File(str):
866
956
  "wrap_data": False,
867
957
  "required": []
868
958
  }
869
-
870
- class DocExtInput(BaseModel):
871
- document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
959
+ class DocumentProcessingCommonInput(BaseModel):
960
+ '''
961
+ This class represents the common input of docext, docproc and docclassifier node
872
962
 
963
+ Attributes:
964
+ document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
965
+ '''
966
+ document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
873
967
 
874
- class DocProcInput(BaseModel):
968
+ class DocProcInput(DocumentProcessingCommonInput):
875
969
  '''
876
970
  This class represents the input of a Document processing task.
877
971
 
878
972
  Attributes:
879
- document_ref (bytes|str): This is either a URL to the location of the document bytes or an ID that we use to resolve the location of the document
880
- language (LanguageCode): Optional language code used when processing the input document
881
973
  kvp_schemas (List[DocProcKVPSchema]): Optional list of key-value pair schemas to use for extraction. If not provided or None, no KVPs will be extracted. If an empty list is provided, we will use the internal schemas to extract KVPS.
882
974
  '''
883
975
  # This is declared as bytes but the runtime will understand if a URL is send in as input.
884
976
  # We need to use bytes here for Chat-with-doc to recognize the input as a File.
885
- document_ref: bytes | File = Field(description="Either an ID or a URL identifying the document to be used.", title='Document reference', default=None, json_schema_extra={"format": "binary"})
886
- kvp_schemas: Optional[List[DocProcKVPSchema]] = Field(
977
+ kvp_schemas: Optional[List[DocProcKVPSchema]] | str = Field(
887
978
  title='KVP schemas',
888
979
  description="Optional list of key-value pair schemas to use for extraction.",
889
980
  default=None)
@@ -892,11 +983,9 @@ class TextExtractionResponse(BaseModel):
892
983
  '''
893
984
  The text extraction operation response.
894
985
  Attributes:
895
- text (str): the text extracted from the input document.
896
- kvps (Optional[list[DocProcKVP]]): A list of key-value pairs extracted from the document. If no KVPs were extracted, this will be None.
986
+ output_file_ref (str): The url to the file that contains the extracted text and kvps.
897
987
  '''
898
- text: str = Field(description='The text extracted from the input document', title='text')
899
- kvps: Optional[list[DocProcKVP]] = Field(description="A list of key-value pairs extracted from the document.", default=None)
988
+ output_file_ref: str = Field(description='The url to the file that contains the extracted text and kvps.', title="output_file_ref")
900
989
 
901
990
 
902
991
  class DecisionsCondition(BaseModel):
@@ -24,8 +24,8 @@ def oauth2_auth_code(app_id:str) -> OAuth2TokenCredentials:
24
24
  # def oauth2_implicit(app_id:str) -> BearerTokenAuthCredentials:
25
25
  # return get_application_connection_credentials(ConnectionType.OAUTH2_IMPLICIT, app_id=app_id)
26
26
 
27
- # def oauth2_password(app_id:str) -> BearerTokenAuthCredentials:
28
- # return get_application_connection_credentials(ConnectionType.OAUTH2_PASSWORD, app_id=app_id)
27
+ def oauth2_password(app_id:str) -> OAuth2TokenCredentials:
28
+ return get_application_connection_credentials(ConnectionType.OAUTH2_PASSWORD, app_id=app_id)
29
29
 
30
30
  def oauth2_client_creds(app_id:str) -> OAuth2TokenCredentials:
31
31
  return get_application_connection_credentials(ConnectionType.OAUTH2_CLIENT_CREDS, app_id=app_id)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ibm-watsonx-orchestrate
3
- Version: 1.9.0b2
3
+ Version: 1.10.0b1
4
4
  Summary: IBM watsonx.orchestrate SDK
5
5
  Author-email: IBM <support@ibm.com>
6
6
  License: MIT License