kodexa 7.0.12399293688__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kodexa/model/objects.py CHANGED
@@ -631,7 +631,7 @@ class ProjectGuidance(BaseModel):
631
631
  description: Optional[str] = None
632
632
  guidance: Optional[List[Guidance]] = Field(None)
633
633
  active_store: bool = Field(False, alias="activeStore")
634
- storage: GuidanceSetStorage = Field(None, description="The storage for the guidance set")
634
+ storage: Optional[GuidanceSetStorage]= Field(None, description="The storage for the guidance set")
635
635
  template_ref: Optional[str] = Field(None, alias="templateRef")
636
636
  ref: Optional[str] = None
637
637
 
@@ -822,7 +822,9 @@ class SelectionOption(BaseModel):
822
822
  label: Optional[str] = None
823
823
  id: Optional[str] = None
824
824
  description: Optional[str] = None
825
- lexical_relations: Optional[List[LexicalRelation]] = Field([], alias="lexicalRelations")
825
+ lexical_relations: Optional[List[LexicalRelation]] = Field(default_factory=list, alias="lexicalRelations")
826
+ is_conditional: Optional[bool] = Field(None, alias="isConditional")
827
+ conditional_formula: Optional[str] = Field(None, alias="conditionalFormula")
826
828
 
827
829
 
828
830
  class SlugBasedMetadata1(BaseModel):
@@ -2645,6 +2647,8 @@ class Taxon(BaseModel):
2645
2647
  expression: Optional[str] = None
2646
2648
  enable_fallback_expression: Optional[bool] = Field(None, alias="enableFallbackExpression")
2647
2649
  fallback_expression: Optional[str] = Field(None, alias="fallbackExpression")
2650
+ enable_serialization_expression: Optional[bool] = Field(None, alias="enableSerializationExpression")
2651
+ serialization_expression: Optional[str] = Field(None, alias="serializationExpression")
2648
2652
  nullable: Optional[bool] = None
2649
2653
  null_value: Optional[str] = Field(None, alias="nullValue")
2650
2654
  denormalize_to_children: Optional[bool] = Field(False, alias="denormalizeToChildren")
@@ -2729,6 +2733,16 @@ class Taxon(BaseModel):
2729
2733
  structure["taxonType"] = self.taxon_type
2730
2734
  return structure
2731
2735
 
2736
+ def get_taxon_by_path(self, path):
2737
+ if self.path == path:
2738
+ return self
2739
+
2740
+ if self.children:
2741
+ for child in self.children:
2742
+ result = child.get_taxon_by_path(path)
2743
+ if result:
2744
+ return result
2745
+
2732
2746
 
2733
2747
  class ContentObject(BaseModel):
2734
2748
  """
@@ -2916,14 +2930,51 @@ class Project(BaseModel):
2916
2930
  owner: Optional[User] = None
2917
2931
  options: Optional[ProjectOptions] = Field(None, alias="options")
2918
2932
 
2919
-
2920
2933
  class TaskStatus(str, Enum):
2921
2934
  TODO = "TODO"
2922
2935
  IN_PROGRESS = "IN_PROGRESS"
2923
2936
  DONE = "DONE"
2924
2937
 
2925
2938
 
2926
- class TaskCheckItem(BaseModel):
2939
+ class TaskActivityType(str, Enum):
2940
+ TASK_CREATED = "TASK_CREATED"
2941
+ TITLE_CHANGED = "TITLE_CHANGED"
2942
+ DESCRIPTION_UPDATED = "DESCRIPTION_UPDATED"
2943
+ STATUS_CHANGED = "STATUS_CHANGED"
2944
+ ASSIGNEE_CHANGED = "ASSIGNEE_CHANGED"
2945
+ DUE_DATE_CHANGED = "DUE_DATE_CHANGED"
2946
+ PROJECT_CHANGED = "PROJECT_CHANGED"
2947
+ COMMENT = "COMMENT"
2948
+
2949
+
2950
+ class TaskActivityDetail(BaseModel):
2951
+ model_config = ConfigDict(
2952
+ populate_by_name=True,
2953
+ use_enum_values=True,
2954
+ arbitrary_types_allowed=True,
2955
+ protected_namespaces=("model_config",),
2956
+ )
2957
+ type: TaskActivityType
2958
+ interpolated_values: Dict[str, Any] = Field(default_factory=dict, alias="interpolatedValues")
2959
+
2960
+
2961
+ class TaskActivity(BaseModel):
2962
+ model_config = ConfigDict(
2963
+ populate_by_name=True,
2964
+ use_enum_values=True,
2965
+ arbitrary_types_allowed=True,
2966
+ protected_namespaces=("model_config",),
2967
+ )
2968
+
2969
+ task: Optional['Task'] = None
2970
+ content: Optional[str] = None
2971
+ detail: Optional[TaskActivityDetail] = None
2972
+ user: Optional['User'] = None
2973
+ search_text: Optional[str] = Field(None, alias="searchText")
2974
+ transient_values: Dict[str, Any] = Field(default_factory=dict, alias="transientValues")
2975
+
2976
+
2977
+ class TaskTag(BaseModel):
2927
2978
  model_config = ConfigDict(
2928
2979
  populate_by_name=True,
2929
2980
  use_enum_values=True,
@@ -2931,10 +2982,61 @@ class TaskCheckItem(BaseModel):
2931
2982
  protected_namespaces=("model_config",),
2932
2983
  )
2933
2984
 
2985
+ project: Optional['Project'] = None
2934
2986
  name: str
2987
+ color: Optional[str] = None
2988
+
2989
+
2990
+ class DataFormAction(BaseModel):
2991
+ model_config = ConfigDict(
2992
+ populate_by_name=True,
2993
+ use_enum_values=True,
2994
+ arbitrary_types_allowed=True,
2995
+ protected_namespaces=("model_config",),
2996
+ )
2997
+
2998
+ type: Optional[str] = None
2999
+ label: Optional[str] = None
3000
+ properties: Dict[str, Any] = Field(default_factory=dict)
3001
+
3002
+
3003
+ class TemplateDataForm(BaseModel):
3004
+ model_config = ConfigDict(
3005
+ populate_by_name=True,
3006
+ use_enum_values=True,
3007
+ arbitrary_types_allowed=True,
3008
+ protected_namespaces=("model_config",),
3009
+ )
3010
+
3011
+ data_form_ref: Optional[str] = Field(None, alias="dataFormRef")
3012
+ actions: List[DataFormAction] = Field(default_factory=list)
3013
+
3014
+
3015
+ class TaskTemplateMetadata(BaseModel):
3016
+ model_config = ConfigDict(
3017
+ populate_by_name=True,
3018
+ use_enum_values=True,
3019
+ arbitrary_types_allowed=True,
3020
+ protected_namespaces=("model_config",),
3021
+ )
3022
+
3023
+ options: List[Option] = Field(default_factory=list)
3024
+ forms: List[TemplateDataForm] = Field(default_factory=list)
3025
+ workspace_id: Optional[str] = Field(None, alias="workspaceId")
3026
+
3027
+
3028
+ class TaskTemplate(BaseModel):
3029
+ model_config = ConfigDict(
3030
+ populate_by_name=True,
3031
+ use_enum_values=True,
3032
+ arbitrary_types_allowed=True,
3033
+ protected_namespaces=("model_config",),
3034
+ )
3035
+
3036
+ project: Optional['Project'] = None
3037
+ name: Optional[str] = None
2935
3038
  description: Optional[str] = None
2936
- taxon_path: Optional[str] = Field(None, alias="taxonPath")
2937
- taxonomy_ref: Optional[str] = Field(None, alias="taxonomyRef")
3039
+ metadata: Optional[TaskTemplateMetadata] = None
2938
3040
 
2939
3041
 
2940
3042
  class TaskMetadata(BaseModel):
@@ -2945,10 +3047,19 @@ class TaskMetadata(BaseModel):
2945
3047
  protected_namespaces=("model_config",),
2946
3048
  )
2947
3049
 
2948
- field_values: Dict[str, Any] = Field(default_factory=dict)
2949
- fields: List[Option] = Field(default_factory=list)
2950
- check_items: List[TaskCheckItem] = Field(default_factory=list)
2951
- document_store_ref: Optional[str] = None
3050
+ properties: Dict[str, Any] = Field(default_factory=dict)
3051
+
3052
+
3053
+ class TaskDocumentFamily(BaseModel):
3054
+ model_config = ConfigDict(
3055
+ populate_by_name=True,
3056
+ use_enum_values=True,
3057
+ arbitrary_types_allowed=True,
3058
+ protected_namespaces=("model_config",),
3059
+ )
3060
+
3061
+ task: Optional['Task'] = None
3062
+ document_family: Optional['DocumentFamily'] = Field(None, alias="documentFamily")
2952
3063
 
2953
3064
 
2954
3065
  class Task(BaseModel):
@@ -2959,16 +3070,19 @@ class Task(BaseModel):
2959
3070
  protected_namespaces=("model_config",),
2960
3071
  )
2961
3072
 
2962
- project: Optional['Project'] = Field(None)
2963
- title: Optional[str] = Field(None)
2964
- template: Optional[bool] = Field(None)
2965
- description: Optional[str] = Field(None)
2966
- metadata: Optional['TaskMetadata'] = Field(None)
3073
+ project: Optional['Project'] = None
3074
+ title: Optional[str] = None
3075
+ description: Optional[str] = None
3076
+ metadata: Optional[TaskMetadata] = None
3077
+ template: Optional[TaskTemplate] = None
2967
3078
  due_date: Optional[StandardDateTime] = Field(None, alias="dueDate")
2968
3079
  completed_date: Optional[StandardDateTime] = Field(None, alias="completedDate")
2969
- status: Optional['TaskStatus'] = Field(None)
2970
- assignee: Optional['User'] = Field(None)
2971
-
3080
+ status: Optional[TaskStatus] = None
3081
+ assignee: Optional['User'] = None
3082
+ task_activity: List[TaskActivity] = Field(default_factory=list, alias="taskActivity")
3083
+ task_document_families: List[TaskDocumentFamily] = Field(default_factory=list, alias="taskDocumentFamilies")
3084
+ search_text: Optional[str] = Field(None, alias="searchText")
3085
+ tags: List[TaskTag] = Field(default_factory=list)
2972
3086
 
2973
3087
  class FeatureSet(BaseModel):
2974
3088
  """
@@ -4504,11 +4618,7 @@ class PageTaxonomy(BaseModel):
4504
4618
  last: Optional[bool] = None
4505
4619
  empty: Optional[bool] = None
4506
4620
 
4507
-
4508
4621
  class GuidanceTagResult(BaseModel):
4509
- """
4510
-
4511
- """
4512
4622
  value: Optional[str] = None
4513
4623
  line_uuid: Optional[str] = Field(None, alias="lineUuid")
4514
4624
 
@@ -4519,7 +4629,6 @@ class UserSelection(BaseModel):
4519
4629
 
4520
4630
 
4521
4631
  class GuidanceRelationEmbedding(BaseModel):
4522
-
4523
4632
  model_config = ConfigDict(
4524
4633
  populate_by_name=True,
4525
4634
  use_enum_values=True,
@@ -4533,7 +4642,10 @@ class GuidanceRelationEmbedding(BaseModel):
4533
4642
 
4534
4643
 
4535
4644
  class Guidance(BaseModel):
4536
- id: Optional[str] = Field(None, description="The ID of the guidance")
4645
+ """
4646
+ A guidance is a set of instructions and examples to guide taxonomies and extraction
4647
+ """
4648
+ id: Optional[str] = Field(None)
4537
4649
  name: Optional[str] = None
4538
4650
  guidance_type: Optional[str] = Field(None, alias="guidanceType")
4539
4651
  guidance_key: Optional[str] = Field(None, alias="guidanceKey")
@@ -4543,13 +4655,13 @@ class Guidance(BaseModel):
4543
4655
  document_name: Optional[str] = Field(None, alias="documentName")
4544
4656
  document_page: Optional[int] = Field(None, alias="documentPage")
4545
4657
  guidance_text: Optional[str] = Field(None, alias="guidanceText")
4546
- relation_embeddings: Optional[List[GuidanceRelationEmbedding]] = Field([], alias="relationEmbeddings")
4658
+ relation_embeddings: Optional[List[GuidanceRelationEmbedding]] = Field(None, alias="relationEmbeddings")
4547
4659
  summary: Optional[str] = None
4548
4660
  guidance_response: Optional[Dict[str, Any]] = Field(None, alias="guidanceResponse")
4549
- active: Optional[bool] = True
4661
+ active: bool = True
4550
4662
  applicable_tags: Optional[List[str]] = Field(None, alias="applicableTags")
4551
4663
  required_tags: Optional[List[str]] = Field(None, alias="requiredTags")
4552
- priority: Optional[int] = 1
4664
+ priority: int = 1
4553
4665
  user_instructions: Optional[str] = Field(None, alias="userInstructions")
4554
4666
  user_instructions_properties: Optional[Dict[str, Any]] = Field(None, alias="userInstructionsProperties")
4555
4667
  user_id: Optional[str] = Field(None, alias="userId")
@@ -4567,17 +4679,11 @@ class GuidanceEmbeddingType(Enum):
4567
4679
 
4568
4680
 
4569
4681
  class GuidanceSetStorage(BaseModel):
4570
- model_config = ConfigDict(
4571
- populate_by_name=True,
4572
- use_enum_values=True,
4573
- arbitrary_types_allowed=True,
4574
- protected_namespaces=("model_config",),
4575
- )
4576
-
4682
+ embedding_model_id: Optional[str] = Field(None, alias="embeddingModelId")
4577
4683
  summarize_model_id: Optional[str] = Field(None, alias="summarizeModelId")
4684
+ use_custom_summarize_prompt: Optional[bool] = Field(None, alias="useCustomSummarizePrompt")
4578
4685
  summarize_prompt: Optional[str] = Field(None, alias="summarizePrompt")
4579
- embedding_model_id: Optional[str] = Field(None, alias="embeddingModelId")
4580
- embedding_types: List[GuidanceEmbeddingType] = Field(default_factory=list, alias="embeddingTypes")
4686
+ embedding_types: Optional[List[GuidanceEmbeddingType]] = Field(None, alias="embeddingTypes")
4581
4687
 
4582
4688
 
4583
4689
  class GuidanceSet(ExtensionPackProvided):
@@ -4591,10 +4697,9 @@ class GuidanceSet(ExtensionPackProvided):
4591
4697
  protected_namespaces=("model_config",),
4592
4698
  )
4593
4699
 
4594
- active_store: bool = Field(False, alias="activeStore",
4595
- description="If true, allows guidance to be stored through the API")
4596
- storage: GuidanceSetStorage = Field(default_factory=GuidanceSetStorage)
4597
- guidance: List[Guidance] = Field(default_factory=list, description="The guidance in the set")
4700
+ active_store: bool = False
4701
+ storage: Optional[GuidanceSetStorage] = None
4702
+ guidance: Optional[List[Guidance]] = None
4598
4703
 
4599
4704
  def get_type(self) -> str:
4600
4705
  return "guidance"
@@ -5674,6 +5779,16 @@ class Taxonomy(ExtensionPackProvided):
5674
5779
  description="A list of references to an external data taxonomy",
5675
5780
  )
5676
5781
 
5782
+ def get_taxon_by_path(self, path):
5783
+ for taxon in self.taxons:
5784
+ if taxon.path == path:
5785
+ return taxon
5786
+ if taxon.children is not None:
5787
+ child_taxon = taxon.get_taxon_by_path(path)
5788
+ if child_taxon is not None:
5789
+ return child_taxon
5790
+ return None
5791
+
5677
5792
  def update_paths(self):
5678
5793
  for taxon in self.taxons:
5679
5794
  taxon.update_path()
@@ -1156,22 +1156,6 @@ class SqliteDocumentPersistence(object):
1156
1156
 
1157
1157
  return content_nodes
1158
1158
 
1159
- def __ensure_ed_table_exists(self):
1160
- """
1161
- Ensure the 'ed' table exists in the database.
1162
- Creates the table if it does not exist.
1163
- """
1164
- self.cursor.execute("""
1165
- CREATE TABLE IF NOT EXISTS ed (
1166
- obj BLOB
1167
- )
1168
- """)
1169
-
1170
- # Check if the table has any rows, if not, insert an initial empty row
1171
- result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
1172
- if result[0] == 0:
1173
- self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
1174
-
1175
1159
  def __ensure_validations_table_exists(self):
1176
1160
  """
1177
1161
  Ensure the 'validations' table exists in the database.
@@ -1213,31 +1197,98 @@ class SqliteDocumentPersistence(object):
1213
1197
  return [DocumentTaxonValidation.model_validate(v) for v in msgpack.unpackb(result[0])]
1214
1198
  return []
1215
1199
 
1216
- def set_external_data(self, external_data: dict):
1200
+ def set_external_data(self, external_data: dict, key: str = "default"):
1217
1201
  """
1218
- Sets the external data for the document.
1202
+ Sets the external data for the document for a specific key.
1219
1203
 
1220
1204
  Args:
1221
1205
  external_data (dict): The external data to store, must be JSON serializable.
1206
+ key (str): The key to store the data under, defaults to "default"
1222
1207
  """
1223
1208
  self.__ensure_ed_table_exists()
1224
1209
  serialized_data = sqlite3.Binary(msgpack.packb(external_data))
1225
- self.cursor.execute("UPDATE ed SET obj = ? WHERE rowid = 1", [serialized_data])
1210
+ self.cursor.execute("DELETE FROM ed WHERE key = ?", [key])
1211
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)", [key, serialized_data])
1226
1212
  self.connection.commit()
1227
1213
 
1228
- def get_external_data(self) -> dict:
1214
+ def get_external_data(self, key: str = "default") -> dict:
1229
1215
  """
1230
- Gets the external data associated with this document.
1216
+ Gets the external data associated with this document for a specific key.
1217
+
1218
+ Args:
1219
+ key (str): The key to retrieve data for, defaults to "default"
1231
1220
 
1232
1221
  Returns:
1233
- dict: The external data stored in the ed table.
1222
+ dict: The external data stored in the ed table for the given key.
1234
1223
  """
1235
1224
  self.__ensure_ed_table_exists()
1236
- result = self.cursor.execute("SELECT obj FROM ed WHERE rowid = 1").fetchone()
1225
+ result = self.cursor.execute("SELECT obj FROM ed WHERE key = ?", [key]).fetchone()
1237
1226
  if result and result[0]:
1238
1227
  return msgpack.unpackb(result[0])
1239
1228
  return {}
1240
1229
 
1230
+ def get_external_data_keys(self) -> List[str]:
1231
+ """
1232
+ Gets all keys under which external data is stored.
1233
+
1234
+ Returns:
1235
+ List[str]: A list of all keys that have external data stored.
1236
+ """
1237
+ self.__ensure_ed_table_exists()
1238
+ results = self.cursor.execute("SELECT key FROM ed").fetchall()
1239
+ return [row[0] for row in results]
1240
+
1241
+ def __ensure_ed_table_exists(self):
1242
+ """
1243
+ Ensure the 'ed' table exists in the database.
1244
+ Creates the table if it does not exist.
1245
+ """
1246
+ # First check if the old table exists and has key column
1247
+ old_table = self.cursor.execute("""
1248
+ SELECT name FROM sqlite_master
1249
+ WHERE type='table' AND name='ed'
1250
+ """).fetchone()
1251
+
1252
+ if old_table:
1253
+ # Check if table has key column
1254
+ table_info = self.cursor.execute("PRAGMA table_info(ed)").fetchall()
1255
+ has_key_column = any(col[1] == 'key' for col in table_info)
1256
+
1257
+ if not has_key_column:
1258
+ # Get the old data and drop the table
1259
+ data = self.cursor.execute("SELECT obj FROM ed").fetchone()
1260
+ self.cursor.execute("DROP TABLE ed")
1261
+
1262
+ # Create new table with key column
1263
+ self.cursor.execute("""
1264
+ CREATE TABLE ed (
1265
+ key TEXT PRIMARY KEY,
1266
+ obj BLOB
1267
+ )
1268
+ """)
1269
+
1270
+ # If there was data in the old table, insert it with default key
1271
+ if data:
1272
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
1273
+ ["default", data[0]])
1274
+ else:
1275
+ # Table exists and has key column - do nothing
1276
+ return
1277
+ else:
1278
+ # Create new table if it doesn't exist
1279
+ self.cursor.execute("""
1280
+ CREATE TABLE IF NOT EXISTS ed (
1281
+ key TEXT PRIMARY KEY,
1282
+ obj BLOB
1283
+ )
1284
+ """)
1285
+
1286
+ # Check if default key exists, if not insert empty data
1287
+ result = self.cursor.execute("SELECT COUNT(*) FROM ed WHERE key = 'default'").fetchone()
1288
+ if result[0] == 0:
1289
+ self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
1290
+ ["default", sqlite3.Binary(msgpack.packb({}))])
1291
+
1241
1292
  def __ensure_steps_table_exists(self):
1242
1293
  """
1243
1294
  Ensure the 'steps' table exists in the database.
@@ -1436,22 +1487,31 @@ class PersistenceManager(object):
1436
1487
  def get_validations(self) -> list[DocumentTaxonValidation]:
1437
1488
  return self._underlying_persistence.get_validations()
1438
1489
 
1439
- def get_external_data(self) -> dict:
1490
+ def get_external_data(self, key="default") -> dict:
1440
1491
  """
1441
1492
  Gets the external data object associated with this document
1442
1493
 
1443
1494
  :return: dict of the external data
1444
1495
  """
1445
- return self._underlying_persistence.get_external_data()
1496
+ return self._underlying_persistence.get_external_data(key)
1497
+
1498
+ def get_external_data_keys(self) -> List[str]:
1499
+ """
1500
+ Gets all keys under which external data is stored.
1501
+
1502
+ Returns:
1503
+ List[str]: A list of all keys that have external data stored.
1504
+ """
1505
+ return self._underlying_persistence.get_external_data_keys()
1446
1506
 
1447
- def set_external_data(self, external_data:dict):
1507
+ def set_external_data(self, external_data:dict, key="default"):
1448
1508
  """
1449
1509
  Sets the external data for this document
1450
1510
 
1451
1511
  :param external_data: dict representing the external data, must be JSON serializable
1452
1512
  :return:
1453
1513
  """
1454
- self._underlying_persistence.set_external_data(external_data)
1514
+ self._underlying_persistence.set_external_data(external_data, key)
1455
1515
 
1456
1516
  def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
1457
1517
  """
@@ -124,10 +124,12 @@ class PipelineContext:
124
124
  self.content_provider = content_provider
125
125
  self.context: Dict = context
126
126
  self.stop_on_exception = True
127
- self.current_document = None
128
- self.document_family = None
129
- self.content_object = None
130
- self.document_store = None
127
+ self.current_document: Optional[Document] = None
128
+ from kodexa.platform.client import DocumentFamilyEndpoint
129
+ self.document_family:Optional[DocumentFamilyEndpoint] = None
130
+ self.content_object:Optional[ContentObject] = None
131
+ from kodexa.platform.client import DocumentStoreEndpoint
132
+ self.document_store:Optional[DocumentStoreEndpoint] = None
131
133
  self.status_handler = status_handler
132
134
  self.cancellation_handler = cancellation_handler
133
135