kodexa 7.0.12399109365__py3-none-any.whl → 7.4.5a13228665254__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +85 -74
- kodexa/model/entities/product.py +59 -6
- kodexa/model/entities/product_group.py +126 -0
- kodexa/model/model.py +7 -4
- kodexa/model/objects.py +155 -40
- kodexa/model/persistence.py +96 -34
- kodexa/pipeline/pipeline.py +6 -4
- kodexa/platform/client.py +227 -14
- {kodexa-7.0.12399109365.dist-info → kodexa-7.4.5a13228665254.dist-info}/METADATA +1 -1
- {kodexa-7.0.12399109365.dist-info → kodexa-7.4.5a13228665254.dist-info}/RECORD +12 -11
- {kodexa-7.0.12399109365.dist-info → kodexa-7.4.5a13228665254.dist-info}/LICENSE +0 -0
- {kodexa-7.0.12399109365.dist-info → kodexa-7.4.5a13228665254.dist-info}/WHEEL +0 -0
kodexa/model/objects.py
CHANGED
@@ -631,7 +631,7 @@ class ProjectGuidance(BaseModel):
|
|
631
631
|
description: Optional[str] = None
|
632
632
|
guidance: Optional[List[Guidance]] = Field(None)
|
633
633
|
active_store: bool = Field(False, alias="activeStore")
|
634
|
-
storage: GuidanceSetStorage
|
634
|
+
storage: Optional[GuidanceSetStorage]= Field(None, description="The storage for the guidance set")
|
635
635
|
template_ref: Optional[str] = Field(None, alias="templateRef")
|
636
636
|
ref: Optional[str] = None
|
637
637
|
|
@@ -822,7 +822,9 @@ class SelectionOption(BaseModel):
|
|
822
822
|
label: Optional[str] = None
|
823
823
|
id: Optional[str] = None
|
824
824
|
description: Optional[str] = None
|
825
|
-
lexical_relations: Optional[List[LexicalRelation]] = Field(
|
825
|
+
lexical_relations: Optional[List[LexicalRelation]] = Field(default_factory=list, alias="lexicalRelations")
|
826
|
+
is_conditional: Optional[bool] = Field(None, alias="isConditional")
|
827
|
+
conditional_formula: Optional[str] = Field(None, alias="conditionalFormula")
|
826
828
|
|
827
829
|
|
828
830
|
class SlugBasedMetadata1(BaseModel):
|
@@ -2645,6 +2647,8 @@ class Taxon(BaseModel):
|
|
2645
2647
|
expression: Optional[str] = None
|
2646
2648
|
enable_fallback_expression: Optional[bool] = Field(None, alias="enableFallbackExpression")
|
2647
2649
|
fallback_expression: Optional[str] = Field(None, alias="fallbackExpression")
|
2650
|
+
enable_serialization_expression: Optional[bool] = Field(None, alias="enableSerializationExpression")
|
2651
|
+
serialization_expression: Optional[str] = Field(None, alias="serializationExpression")
|
2648
2652
|
nullable: Optional[bool] = None
|
2649
2653
|
null_value: Optional[str] = Field(None, alias="nullValue")
|
2650
2654
|
denormalize_to_children: Optional[bool] = Field(False, alias="denormalizeToChildren")
|
@@ -2729,6 +2733,16 @@ class Taxon(BaseModel):
|
|
2729
2733
|
structure["taxonType"] = self.taxon_type
|
2730
2734
|
return structure
|
2731
2735
|
|
2736
|
+
def get_taxon_by_path(self, path):
|
2737
|
+
if self.path == path:
|
2738
|
+
return self
|
2739
|
+
|
2740
|
+
if self.children:
|
2741
|
+
for child in self.children:
|
2742
|
+
result = child.get_taxon_by_path(path)
|
2743
|
+
if result:
|
2744
|
+
return result
|
2745
|
+
|
2732
2746
|
|
2733
2747
|
class ContentObject(BaseModel):
|
2734
2748
|
"""
|
@@ -2916,14 +2930,51 @@ class Project(BaseModel):
|
|
2916
2930
|
owner: Optional[User] = None
|
2917
2931
|
options: Optional[ProjectOptions] = Field(None, alias="options")
|
2918
2932
|
|
2919
|
-
|
2920
2933
|
class TaskStatus(str, Enum):
|
2921
2934
|
TODO = "TODO"
|
2922
2935
|
IN_PROGRESS = "IN_PROGRESS"
|
2923
2936
|
DONE = "DONE"
|
2924
2937
|
|
2925
2938
|
|
2926
|
-
class
|
2939
|
+
class TaskActivityType(str, Enum):
|
2940
|
+
TASK_CREATED = "TASK_CREATED"
|
2941
|
+
TITLE_CHANGED = "TITLE_CHANGED"
|
2942
|
+
DESCRIPTION_UPDATED = "DESCRIPTION_UPDATED"
|
2943
|
+
STATUS_CHANGED = "STATUS_CHANGED"
|
2944
|
+
ASSIGNEE_CHANGED = "ASSIGNEE_CHANGED"
|
2945
|
+
DUE_DATE_CHANGED = "DUE_DATE_CHANGED"
|
2946
|
+
PROJECT_CHANGED = "PROJECT_CHANGED"
|
2947
|
+
COMMENT = "COMMENT"
|
2948
|
+
|
2949
|
+
|
2950
|
+
class TaskActivityDetail(BaseModel):
|
2951
|
+
model_config = ConfigDict(
|
2952
|
+
populate_by_name=True,
|
2953
|
+
use_enum_values=True,
|
2954
|
+
arbitrary_types_allowed=True,
|
2955
|
+
protected_namespaces=("model_config",),
|
2956
|
+
)
|
2957
|
+
type: TaskActivityType
|
2958
|
+
interpolated_values: Dict[str, Any] = Field(default_factory=dict, alias="interpolatedValues")
|
2959
|
+
|
2960
|
+
|
2961
|
+
class TaskActivity(BaseModel):
|
2962
|
+
model_config = ConfigDict(
|
2963
|
+
populate_by_name=True,
|
2964
|
+
use_enum_values=True,
|
2965
|
+
arbitrary_types_allowed=True,
|
2966
|
+
protected_namespaces=("model_config",),
|
2967
|
+
)
|
2968
|
+
|
2969
|
+
task: Optional['Task'] = None
|
2970
|
+
content: Optional[str] = None
|
2971
|
+
detail: Optional[TaskActivityDetail] = None
|
2972
|
+
user: Optional['User'] = None
|
2973
|
+
search_text: Optional[str] = Field(None, alias="searchText")
|
2974
|
+
transient_values: Dict[str, Any] = Field(default_factory=dict, alias="transientValues")
|
2975
|
+
|
2976
|
+
|
2977
|
+
class TaskTag(BaseModel):
|
2927
2978
|
model_config = ConfigDict(
|
2928
2979
|
populate_by_name=True,
|
2929
2980
|
use_enum_values=True,
|
@@ -2931,10 +2982,61 @@ class TaskCheckItem(BaseModel):
|
|
2931
2982
|
protected_namespaces=("model_config",),
|
2932
2983
|
)
|
2933
2984
|
|
2985
|
+
project: Optional['Project'] = None
|
2934
2986
|
name: str
|
2987
|
+
color: Optional[str] = None
|
2988
|
+
|
2989
|
+
|
2990
|
+
class DataFormAction(BaseModel):
|
2991
|
+
model_config = ConfigDict(
|
2992
|
+
populate_by_name=True,
|
2993
|
+
use_enum_values=True,
|
2994
|
+
arbitrary_types_allowed=True,
|
2995
|
+
protected_namespaces=("model_config",),
|
2996
|
+
)
|
2997
|
+
|
2998
|
+
type: Optional[str] = None
|
2999
|
+
label: Optional[str] = None
|
3000
|
+
properties: Dict[str, Any] = Field(default_factory=dict)
|
3001
|
+
|
3002
|
+
|
3003
|
+
class TemplateDataForm(BaseModel):
|
3004
|
+
model_config = ConfigDict(
|
3005
|
+
populate_by_name=True,
|
3006
|
+
use_enum_values=True,
|
3007
|
+
arbitrary_types_allowed=True,
|
3008
|
+
protected_namespaces=("model_config",),
|
3009
|
+
)
|
3010
|
+
|
3011
|
+
data_form_ref: Optional[str] = Field(None, alias="dataFormRef")
|
3012
|
+
actions: List[DataFormAction] = Field(default_factory=list)
|
3013
|
+
|
3014
|
+
|
3015
|
+
class TaskTemplateMetadata(BaseModel):
|
3016
|
+
model_config = ConfigDict(
|
3017
|
+
populate_by_name=True,
|
3018
|
+
use_enum_values=True,
|
3019
|
+
arbitrary_types_allowed=True,
|
3020
|
+
protected_namespaces=("model_config",),
|
3021
|
+
)
|
3022
|
+
|
3023
|
+
options: List[Option] = Field(default_factory=list)
|
3024
|
+
forms: List[TemplateDataForm] = Field(default_factory=list)
|
3025
|
+
workspace_id: Optional[str] = Field(None, alias="workspaceId")
|
3026
|
+
|
3027
|
+
|
3028
|
+
class TaskTemplate(BaseModel):
|
3029
|
+
model_config = ConfigDict(
|
3030
|
+
populate_by_name=True,
|
3031
|
+
use_enum_values=True,
|
3032
|
+
arbitrary_types_allowed=True,
|
3033
|
+
protected_namespaces=("model_config",),
|
3034
|
+
)
|
3035
|
+
|
3036
|
+
project: Optional['Project'] = None
|
3037
|
+
name: Optional[str] = None
|
2935
3038
|
description: Optional[str] = None
|
2936
|
-
|
2937
|
-
taxonomy_ref: Optional[str] = Field(None, alias="taxonomyRef")
|
3039
|
+
metadata: Optional[TaskTemplateMetadata] = None
|
2938
3040
|
|
2939
3041
|
|
2940
3042
|
class TaskMetadata(BaseModel):
|
@@ -2945,10 +3047,19 @@ class TaskMetadata(BaseModel):
|
|
2945
3047
|
protected_namespaces=("model_config",),
|
2946
3048
|
)
|
2947
3049
|
|
2948
|
-
|
2949
|
-
|
2950
|
-
|
2951
|
-
|
3050
|
+
properties: Dict[str, Any] = Field(default_factory=dict)
|
3051
|
+
|
3052
|
+
|
3053
|
+
class TaskDocumentFamily(BaseModel):
|
3054
|
+
model_config = ConfigDict(
|
3055
|
+
populate_by_name=True,
|
3056
|
+
use_enum_values=True,
|
3057
|
+
arbitrary_types_allowed=True,
|
3058
|
+
protected_namespaces=("model_config",),
|
3059
|
+
)
|
3060
|
+
|
3061
|
+
task: Optional['Task'] = None
|
3062
|
+
document_family: Optional['DocumentFamily'] = Field(None, alias="documentFamily")
|
2952
3063
|
|
2953
3064
|
|
2954
3065
|
class Task(BaseModel):
|
@@ -2959,16 +3070,19 @@ class Task(BaseModel):
|
|
2959
3070
|
protected_namespaces=("model_config",),
|
2960
3071
|
)
|
2961
3072
|
|
2962
|
-
project: Optional['Project'] =
|
2963
|
-
title: Optional[str] =
|
2964
|
-
|
2965
|
-
|
2966
|
-
|
3073
|
+
project: Optional['Project'] = None
|
3074
|
+
title: Optional[str] = None
|
3075
|
+
description: Optional[str] = None
|
3076
|
+
metadata: Optional[TaskMetadata] = None
|
3077
|
+
template: Optional[TaskTemplate] = None
|
2967
3078
|
due_date: Optional[StandardDateTime] = Field(None, alias="dueDate")
|
2968
3079
|
completed_date: Optional[StandardDateTime] = Field(None, alias="completedDate")
|
2969
|
-
status: Optional[
|
2970
|
-
assignee: Optional['User'] =
|
2971
|
-
|
3080
|
+
status: Optional[TaskStatus] = None
|
3081
|
+
assignee: Optional['User'] = None
|
3082
|
+
task_activity: List[TaskActivity] = Field(default_factory=list, alias="taskActivity")
|
3083
|
+
task_document_families: List[TaskDocumentFamily] = Field(default_factory=list, alias="taskDocumentFamilies")
|
3084
|
+
search_text: Optional[str] = Field(None, alias="searchText")
|
3085
|
+
tags: List[TaskTag] = Field(default_factory=list)
|
2972
3086
|
|
2973
3087
|
class FeatureSet(BaseModel):
|
2974
3088
|
"""
|
@@ -4504,11 +4618,7 @@ class PageTaxonomy(BaseModel):
|
|
4504
4618
|
last: Optional[bool] = None
|
4505
4619
|
empty: Optional[bool] = None
|
4506
4620
|
|
4507
|
-
|
4508
4621
|
class GuidanceTagResult(BaseModel):
|
4509
|
-
"""
|
4510
|
-
|
4511
|
-
"""
|
4512
4622
|
value: Optional[str] = None
|
4513
4623
|
line_uuid: Optional[str] = Field(None, alias="lineUuid")
|
4514
4624
|
|
@@ -4519,7 +4629,6 @@ class UserSelection(BaseModel):
|
|
4519
4629
|
|
4520
4630
|
|
4521
4631
|
class GuidanceRelationEmbedding(BaseModel):
|
4522
|
-
|
4523
4632
|
model_config = ConfigDict(
|
4524
4633
|
populate_by_name=True,
|
4525
4634
|
use_enum_values=True,
|
@@ -4533,7 +4642,10 @@ class GuidanceRelationEmbedding(BaseModel):
|
|
4533
4642
|
|
4534
4643
|
|
4535
4644
|
class Guidance(BaseModel):
|
4536
|
-
|
4645
|
+
"""
|
4646
|
+
A guidance is a set of instructions and examples to guide taxonomies and extraction
|
4647
|
+
"""
|
4648
|
+
id: Optional[str] = Field(None)
|
4537
4649
|
name: Optional[str] = None
|
4538
4650
|
guidance_type: Optional[str] = Field(None, alias="guidanceType")
|
4539
4651
|
guidance_key: Optional[str] = Field(None, alias="guidanceKey")
|
@@ -4543,13 +4655,13 @@ class Guidance(BaseModel):
|
|
4543
4655
|
document_name: Optional[str] = Field(None, alias="documentName")
|
4544
4656
|
document_page: Optional[int] = Field(None, alias="documentPage")
|
4545
4657
|
guidance_text: Optional[str] = Field(None, alias="guidanceText")
|
4546
|
-
relation_embeddings: Optional[List[GuidanceRelationEmbedding]] = Field(
|
4658
|
+
relation_embeddings: Optional[List[GuidanceRelationEmbedding]] = Field(None, alias="relationEmbeddings")
|
4547
4659
|
summary: Optional[str] = None
|
4548
4660
|
guidance_response: Optional[Dict[str, Any]] = Field(None, alias="guidanceResponse")
|
4549
|
-
active:
|
4661
|
+
active: bool = True
|
4550
4662
|
applicable_tags: Optional[List[str]] = Field(None, alias="applicableTags")
|
4551
4663
|
required_tags: Optional[List[str]] = Field(None, alias="requiredTags")
|
4552
|
-
priority:
|
4664
|
+
priority: int = 1
|
4553
4665
|
user_instructions: Optional[str] = Field(None, alias="userInstructions")
|
4554
4666
|
user_instructions_properties: Optional[Dict[str, Any]] = Field(None, alias="userInstructionsProperties")
|
4555
4667
|
user_id: Optional[str] = Field(None, alias="userId")
|
@@ -4567,17 +4679,11 @@ class GuidanceEmbeddingType(Enum):
|
|
4567
4679
|
|
4568
4680
|
|
4569
4681
|
class GuidanceSetStorage(BaseModel):
|
4570
|
-
|
4571
|
-
populate_by_name=True,
|
4572
|
-
use_enum_values=True,
|
4573
|
-
arbitrary_types_allowed=True,
|
4574
|
-
protected_namespaces=("model_config",),
|
4575
|
-
)
|
4576
|
-
|
4682
|
+
embedding_model_id: Optional[str] = Field(None, alias="embeddingModelId")
|
4577
4683
|
summarize_model_id: Optional[str] = Field(None, alias="summarizeModelId")
|
4684
|
+
use_custom_summarize_prompt: Optional[bool] = Field(None, alias="useCustomSummarizePrompt")
|
4578
4685
|
summarize_prompt: Optional[str] = Field(None, alias="summarizePrompt")
|
4579
|
-
|
4580
|
-
embedding_types: List[GuidanceEmbeddingType] = Field(default_factory=list, alias="embeddingTypes")
|
4686
|
+
embedding_types: Optional[List[GuidanceEmbeddingType]] = Field(None, alias="embeddingTypes")
|
4581
4687
|
|
4582
4688
|
|
4583
4689
|
class GuidanceSet(ExtensionPackProvided):
|
@@ -4591,10 +4697,9 @@ class GuidanceSet(ExtensionPackProvided):
|
|
4591
4697
|
protected_namespaces=("model_config",),
|
4592
4698
|
)
|
4593
4699
|
|
4594
|
-
active_store: bool =
|
4595
|
-
|
4596
|
-
|
4597
|
-
guidance: List[Guidance] = Field(default_factory=list, description="The guidance in the set")
|
4700
|
+
active_store: bool = False
|
4701
|
+
storage: Optional[GuidanceSetStorage] = None
|
4702
|
+
guidance: Optional[List[Guidance]] = None
|
4598
4703
|
|
4599
4704
|
def get_type(self) -> str:
|
4600
4705
|
return "guidance"
|
@@ -5674,6 +5779,16 @@ class Taxonomy(ExtensionPackProvided):
|
|
5674
5779
|
description="A list of references to an external data taxonomy",
|
5675
5780
|
)
|
5676
5781
|
|
5782
|
+
def get_taxon_by_path(self, path):
|
5783
|
+
for taxon in self.taxons:
|
5784
|
+
if taxon.path == path:
|
5785
|
+
return taxon
|
5786
|
+
if taxon.children is not None:
|
5787
|
+
child_taxon = taxon.get_taxon_by_path(path)
|
5788
|
+
if child_taxon is not None:
|
5789
|
+
return child_taxon
|
5790
|
+
return None
|
5791
|
+
|
5677
5792
|
def update_paths(self):
|
5678
5793
|
for taxon in self.taxons:
|
5679
5794
|
taxon.update_path()
|
kodexa/model/persistence.py
CHANGED
@@ -111,9 +111,10 @@ class SqliteDocumentPersistence(object):
|
|
111
111
|
|
112
112
|
self.cursor = self.connection.cursor()
|
113
113
|
self.cursor.execute("PRAGMA journal_mode=OFF")
|
114
|
-
self.cursor.execute("
|
115
|
-
self.cursor.execute("
|
116
|
-
self.cursor.execute("
|
114
|
+
self.cursor.execute("PRAGMA temp_store=MEMORY")
|
115
|
+
self.cursor.execute("PRAGMA mmap_size=30000000000")
|
116
|
+
self.cursor.execute("PRAGMA cache_size=10000")
|
117
|
+
self.cursor.execute("PRAGMA page_size=4096")
|
117
118
|
|
118
119
|
try:
|
119
120
|
# We need to populate node_type_id_by_name
|
@@ -865,10 +866,11 @@ class SqliteDocumentPersistence(object):
|
|
865
866
|
self.connection.commit()
|
866
867
|
self.cursor.execute("VACUUM")
|
867
868
|
self.cursor = self.connection.cursor()
|
868
|
-
self.cursor.execute("
|
869
|
-
self.cursor.execute("
|
870
|
-
self.cursor.execute("
|
871
|
-
self.cursor.execute("
|
869
|
+
self.cursor.execute("PRAGMA journal_mode=OFF")
|
870
|
+
self.cursor.execute("PRAGMA temp_store=MEMORY")
|
871
|
+
self.cursor.execute("PRAGMA mmap_size=30000000000")
|
872
|
+
self.cursor.execute("PRAGMA cache_size=10000")
|
873
|
+
self.cursor.execute("PRAGMA page_size=4096")
|
872
874
|
|
873
875
|
def dump_in_memory_db_to_file(self):
|
874
876
|
# Connect to a new or existing database file
|
@@ -1154,22 +1156,6 @@ class SqliteDocumentPersistence(object):
|
|
1154
1156
|
|
1155
1157
|
return content_nodes
|
1156
1158
|
|
1157
|
-
def __ensure_ed_table_exists(self):
|
1158
|
-
"""
|
1159
|
-
Ensure the 'ed' table exists in the database.
|
1160
|
-
Creates the table if it does not exist.
|
1161
|
-
"""
|
1162
|
-
self.cursor.execute("""
|
1163
|
-
CREATE TABLE IF NOT EXISTS ed (
|
1164
|
-
obj BLOB
|
1165
|
-
)
|
1166
|
-
""")
|
1167
|
-
|
1168
|
-
# Check if the table has any rows, if not, insert an initial empty row
|
1169
|
-
result = self.cursor.execute("SELECT COUNT(*) FROM ed").fetchone()
|
1170
|
-
if result[0] == 0:
|
1171
|
-
self.cursor.execute("INSERT INTO ed (obj) VALUES (?)", [sqlite3.Binary(msgpack.packb({}))])
|
1172
|
-
|
1173
1159
|
def __ensure_validations_table_exists(self):
|
1174
1160
|
"""
|
1175
1161
|
Ensure the 'validations' table exists in the database.
|
@@ -1211,31 +1197,98 @@ class SqliteDocumentPersistence(object):
|
|
1211
1197
|
return [DocumentTaxonValidation.model_validate(v) for v in msgpack.unpackb(result[0])]
|
1212
1198
|
return []
|
1213
1199
|
|
1214
|
-
def set_external_data(self, external_data: dict):
|
1200
|
+
def set_external_data(self, external_data: dict, key: str = "default"):
|
1215
1201
|
"""
|
1216
|
-
Sets the external data for the document.
|
1202
|
+
Sets the external data for the document for a specific key.
|
1217
1203
|
|
1218
1204
|
Args:
|
1219
1205
|
external_data (dict): The external data to store, must be JSON serializable.
|
1206
|
+
key (str): The key to store the data under, defaults to "default"
|
1220
1207
|
"""
|
1221
1208
|
self.__ensure_ed_table_exists()
|
1222
1209
|
serialized_data = sqlite3.Binary(msgpack.packb(external_data))
|
1223
|
-
self.cursor.execute("
|
1210
|
+
self.cursor.execute("DELETE FROM ed WHERE key = ?", [key])
|
1211
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)", [key, serialized_data])
|
1224
1212
|
self.connection.commit()
|
1225
1213
|
|
1226
|
-
def get_external_data(self) -> dict:
|
1214
|
+
def get_external_data(self, key: str = "default") -> dict:
|
1227
1215
|
"""
|
1228
|
-
Gets the external data associated with this document.
|
1216
|
+
Gets the external data associated with this document for a specific key.
|
1217
|
+
|
1218
|
+
Args:
|
1219
|
+
key (str): The key to retrieve data for, defaults to "default"
|
1229
1220
|
|
1230
1221
|
Returns:
|
1231
|
-
dict: The external data stored in the ed table.
|
1222
|
+
dict: The external data stored in the ed table for the given key.
|
1232
1223
|
"""
|
1233
1224
|
self.__ensure_ed_table_exists()
|
1234
|
-
result = self.cursor.execute("SELECT obj FROM ed WHERE
|
1225
|
+
result = self.cursor.execute("SELECT obj FROM ed WHERE key = ?", [key]).fetchone()
|
1235
1226
|
if result and result[0]:
|
1236
1227
|
return msgpack.unpackb(result[0])
|
1237
1228
|
return {}
|
1238
1229
|
|
1230
|
+
def get_external_data_keys(self) -> List[str]:
|
1231
|
+
"""
|
1232
|
+
Gets all keys under which external data is stored.
|
1233
|
+
|
1234
|
+
Returns:
|
1235
|
+
List[str]: A list of all keys that have external data stored.
|
1236
|
+
"""
|
1237
|
+
self.__ensure_ed_table_exists()
|
1238
|
+
results = self.cursor.execute("SELECT key FROM ed").fetchall()
|
1239
|
+
return [row[0] for row in results]
|
1240
|
+
|
1241
|
+
def __ensure_ed_table_exists(self):
|
1242
|
+
"""
|
1243
|
+
Ensure the 'ed' table exists in the database.
|
1244
|
+
Creates the table if it does not exist.
|
1245
|
+
"""
|
1246
|
+
# First check if the old table exists and has key column
|
1247
|
+
old_table = self.cursor.execute("""
|
1248
|
+
SELECT name FROM sqlite_master
|
1249
|
+
WHERE type='table' AND name='ed'
|
1250
|
+
""").fetchone()
|
1251
|
+
|
1252
|
+
if old_table:
|
1253
|
+
# Check if table has key column
|
1254
|
+
table_info = self.cursor.execute("PRAGMA table_info(ed)").fetchall()
|
1255
|
+
has_key_column = any(col[1] == 'key' for col in table_info)
|
1256
|
+
|
1257
|
+
if not has_key_column:
|
1258
|
+
# Get the old data and drop the table
|
1259
|
+
data = self.cursor.execute("SELECT obj FROM ed").fetchone()
|
1260
|
+
self.cursor.execute("DROP TABLE ed")
|
1261
|
+
|
1262
|
+
# Create new table with key column
|
1263
|
+
self.cursor.execute("""
|
1264
|
+
CREATE TABLE ed (
|
1265
|
+
key TEXT PRIMARY KEY,
|
1266
|
+
obj BLOB
|
1267
|
+
)
|
1268
|
+
""")
|
1269
|
+
|
1270
|
+
# If there was data in the old table, insert it with default key
|
1271
|
+
if data:
|
1272
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
|
1273
|
+
["default", data[0]])
|
1274
|
+
else:
|
1275
|
+
# Table exists and has key column - do nothing
|
1276
|
+
return
|
1277
|
+
else:
|
1278
|
+
# Create new table if it doesn't exist
|
1279
|
+
self.cursor.execute("""
|
1280
|
+
CREATE TABLE IF NOT EXISTS ed (
|
1281
|
+
key TEXT PRIMARY KEY,
|
1282
|
+
obj BLOB
|
1283
|
+
)
|
1284
|
+
""")
|
1285
|
+
|
1286
|
+
# Check if default key exists, if not insert empty data
|
1287
|
+
result = self.cursor.execute("SELECT COUNT(*) FROM ed WHERE key = 'default'").fetchone()
|
1288
|
+
if result[0] == 0:
|
1289
|
+
self.cursor.execute("INSERT INTO ed (key, obj) VALUES (?, ?)",
|
1290
|
+
["default", sqlite3.Binary(msgpack.packb({}))])
|
1291
|
+
|
1239
1292
|
def __ensure_steps_table_exists(self):
|
1240
1293
|
"""
|
1241
1294
|
Ensure the 'steps' table exists in the database.
|
@@ -1434,22 +1487,31 @@ class PersistenceManager(object):
|
|
1434
1487
|
def get_validations(self) -> list[DocumentTaxonValidation]:
|
1435
1488
|
return self._underlying_persistence.get_validations()
|
1436
1489
|
|
1437
|
-
def get_external_data(self) -> dict:
|
1490
|
+
def get_external_data(self, key="default") -> dict:
|
1438
1491
|
"""
|
1439
1492
|
Gets the external data object associated with this document
|
1440
1493
|
|
1441
1494
|
:return: dict of the external data
|
1442
1495
|
"""
|
1443
|
-
return self._underlying_persistence.get_external_data()
|
1496
|
+
return self._underlying_persistence.get_external_data(key)
|
1497
|
+
|
1498
|
+
def get_external_data_keys(self) -> List[str]:
|
1499
|
+
"""
|
1500
|
+
Gets all keys under which external data is stored.
|
1501
|
+
|
1502
|
+
Returns:
|
1503
|
+
List[str]: A list of all keys that have external data stored.
|
1504
|
+
"""
|
1505
|
+
return self._underlying_persistence.get_external_data_keys()
|
1444
1506
|
|
1445
|
-
def set_external_data(self, external_data:dict):
|
1507
|
+
def set_external_data(self, external_data:dict, key="default"):
|
1446
1508
|
"""
|
1447
1509
|
Sets the external data for this document
|
1448
1510
|
|
1449
1511
|
:param external_data: dict representing the external data, must be JSON serializable
|
1450
1512
|
:return:
|
1451
1513
|
"""
|
1452
|
-
self._underlying_persistence.set_external_data(external_data)
|
1514
|
+
self._underlying_persistence.set_external_data(external_data, key)
|
1453
1515
|
|
1454
1516
|
def get_nodes_by_type(self, node_type: str) -> List[ContentNode]:
|
1455
1517
|
"""
|
kodexa/pipeline/pipeline.py
CHANGED
@@ -124,10 +124,12 @@ class PipelineContext:
|
|
124
124
|
self.content_provider = content_provider
|
125
125
|
self.context: Dict = context
|
126
126
|
self.stop_on_exception = True
|
127
|
-
self.current_document = None
|
128
|
-
|
129
|
-
self.
|
130
|
-
self.
|
127
|
+
self.current_document: Optional[Document] = None
|
128
|
+
from kodexa.platform.client import DocumentFamilyEndpoint
|
129
|
+
self.document_family:Optional[DocumentFamilyEndpoint] = None
|
130
|
+
self.content_object:Optional[ContentObject] = None
|
131
|
+
from kodexa.platform.client import DocumentStoreEndpoint
|
132
|
+
self.document_store:Optional[DocumentStoreEndpoint] = None
|
131
133
|
self.status_handler = status_handler
|
132
134
|
self.cancellation_handler = cancellation_handler
|
133
135
|
|