kodexa-document 7.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodexa-document might be problematic. Click here for more details.
- kodexa_document/connectors.py +456 -0
- kodexa_document/model.py +3642 -0
- kodexa_document/persistence.py +2057 -0
- kodexa_document/persistence_models.py +421 -0
- kodexa_document/selectors/__init__.py +5 -0
- kodexa_document/selectors/ast.py +677 -0
- kodexa_document/selectors/error.py +29 -0
- kodexa_document/selectors/kodexa-ast-visitor.py +268 -0
- kodexa_document/selectors/parser.py +91 -0
- kodexa_document/selectors/resources/KodexaSelector.interp +99 -0
- kodexa_document/selectors/resources/KodexaSelector.tokens +56 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.interp +119 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.py +204 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.tokens +56 -0
- kodexa_document/selectors/resources/KodexaSelectorListener.py +570 -0
- kodexa_document/selectors/resources/KodexaSelectorParser.py +3246 -0
- kodexa_document/selectors/resources/KodexaSelectorVisitor.py +323 -0
- kodexa_document/selectors/visitor.py +265 -0
- kodexa_document/steps.py +109 -0
- kodexa_document-7.5.0.dist-info/METADATA +27 -0
- kodexa_document-7.5.0.dist-info/RECORD +22 -0
- kodexa_document-7.5.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
from peewee import *
|
|
2
|
+
import json
|
|
3
|
+
import datetime
|
|
4
|
+
import msgpack
|
|
5
|
+
from playhouse.sqlite_ext import JSONField, BlobField
|
|
6
|
+
|
|
7
|
+
database = SqliteDatabase(None) # Will be initialized later with actual DB path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseModel(Model):
|
|
11
|
+
class Meta:
|
|
12
|
+
database = database
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Taxonomy(BaseModel):
|
|
16
|
+
id = AutoField()
|
|
17
|
+
ref = TextField()
|
|
18
|
+
|
|
19
|
+
class Meta:
|
|
20
|
+
table_name = "kddb_taxonomy"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DataObject(BaseModel):
|
|
24
|
+
id = AutoField()
|
|
25
|
+
parent = ForeignKeyField(
|
|
26
|
+
"self", backref="children", null=True, column_name="parent_id"
|
|
27
|
+
)
|
|
28
|
+
taxonomy = ForeignKeyField(
|
|
29
|
+
Taxonomy, backref="data_objects", null=True, column_name="taxonomy_id"
|
|
30
|
+
)
|
|
31
|
+
idx = IntegerField(null=True)
|
|
32
|
+
path = TextField(null=True)
|
|
33
|
+
group_uuid = TextField(null=True)
|
|
34
|
+
cell_index = IntegerField(null=True)
|
|
35
|
+
source_ordering = TextField(null=True)
|
|
36
|
+
created = DateTimeField(default=datetime.datetime.now)
|
|
37
|
+
modified = DateTimeField(default=datetime.datetime.now)
|
|
38
|
+
lineage = JSONField(null=True)
|
|
39
|
+
deleted = BooleanField(default=False)
|
|
40
|
+
|
|
41
|
+
class Meta:
|
|
42
|
+
table_name = "kddb_data_objects"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class NodeType(BaseModel):
|
|
46
|
+
id = AutoField()
|
|
47
|
+
name = TextField()
|
|
48
|
+
|
|
49
|
+
class Meta:
|
|
50
|
+
table_name = "kddb_node_types"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ContentNode(BaseModel):
|
|
54
|
+
id = AutoField()
|
|
55
|
+
parent = ForeignKeyField(
|
|
56
|
+
"self", backref="children", null=True, column_name="parent_id"
|
|
57
|
+
)
|
|
58
|
+
node_type = TextField()
|
|
59
|
+
content = TextField(null=True)
|
|
60
|
+
created = DateTimeField(default=datetime.datetime.now)
|
|
61
|
+
modified = DateTimeField(default=datetime.datetime.now)
|
|
62
|
+
index = IntegerField(null=True)
|
|
63
|
+
|
|
64
|
+
class Meta:
|
|
65
|
+
table_name = "kddb_content_nodes"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ContentNodePart(BaseModel):
|
|
69
|
+
id = AutoField()
|
|
70
|
+
content_node = ForeignKeyField(
|
|
71
|
+
ContentNode, backref="parts", column_name="content_node_id"
|
|
72
|
+
)
|
|
73
|
+
pos = IntegerField()
|
|
74
|
+
content = TextField(null=True)
|
|
75
|
+
content_idx = IntegerField(null=True)
|
|
76
|
+
|
|
77
|
+
class Meta:
|
|
78
|
+
table_name = "kddb_content_node_parts"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ContentException(BaseModel):
|
|
82
|
+
id = AutoField()
|
|
83
|
+
data_object = ForeignKeyField(
|
|
84
|
+
DataObject,
|
|
85
|
+
backref="content_exceptions",
|
|
86
|
+
null=True,
|
|
87
|
+
column_name="data_object_id",
|
|
88
|
+
)
|
|
89
|
+
message = TextField(null=True)
|
|
90
|
+
exception_details = TextField(null=True)
|
|
91
|
+
exception_type = TextField(null=True)
|
|
92
|
+
severity = TextField(null=True)
|
|
93
|
+
path = TextField(null=True)
|
|
94
|
+
closing_comment = TextField(null=True)
|
|
95
|
+
open = BooleanField(default=True)
|
|
96
|
+
node_uuid = TextField(null=True)
|
|
97
|
+
exception_type_id = TextField(null=True)
|
|
98
|
+
|
|
99
|
+
class Meta:
|
|
100
|
+
table_name = "kddb_content_exceptions"
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class FeatureType(BaseModel):
|
|
104
|
+
id = AutoField()
|
|
105
|
+
name = TextField()
|
|
106
|
+
|
|
107
|
+
class Meta:
|
|
108
|
+
table_name = "kddb_feature_types"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class Feature(BaseModel):
|
|
112
|
+
id = AutoField()
|
|
113
|
+
feature_type = ForeignKeyField(
|
|
114
|
+
FeatureType, backref="features", column_name="feature_type_id"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
class Meta:
|
|
118
|
+
table_name = "kddb_features"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ContentNodeFeatureLink(BaseModel):
|
|
122
|
+
id = AutoField()
|
|
123
|
+
content_node = ForeignKeyField(
|
|
124
|
+
ContentNode, backref="feature_links", column_name="content_node_id"
|
|
125
|
+
)
|
|
126
|
+
feature = ForeignKeyField(
|
|
127
|
+
Feature, backref="content_node_links", column_name="feature_id"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
class Meta:
|
|
131
|
+
table_name = "kddb_content_node_feature_links"
|
|
132
|
+
indexes = (
|
|
133
|
+
(("content_node_id", "feature_id"), True), # Ensure uniqueness of pairs
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class FeatureBlob(BaseModel):
|
|
138
|
+
id = AutoField()
|
|
139
|
+
feature = ForeignKeyField(Feature, backref="blobs", column_name="feature_id")
|
|
140
|
+
binary_value = BlobField()
|
|
141
|
+
|
|
142
|
+
class Meta:
|
|
143
|
+
table_name = "kddb_feature_blob"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class FeatureBBox(BaseModel):
|
|
147
|
+
id = AutoField()
|
|
148
|
+
feature = ForeignKeyField(Feature, backref="bboxes", column_name="feature_id")
|
|
149
|
+
x1 = FloatField()
|
|
150
|
+
y1 = FloatField()
|
|
151
|
+
x2 = FloatField()
|
|
152
|
+
y2 = FloatField()
|
|
153
|
+
|
|
154
|
+
class Meta:
|
|
155
|
+
table_name = "kddb_feature_bbox"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class FeatureTag(BaseModel):
|
|
159
|
+
id = AutoField()
|
|
160
|
+
feature = ForeignKeyField(Feature, backref="tags", column_name="feature_id")
|
|
161
|
+
tag_value = TextField(null=True)
|
|
162
|
+
start_pos = IntegerField(null=True)
|
|
163
|
+
end_pos = IntegerField(null=True)
|
|
164
|
+
uuid = TextField(null=True)
|
|
165
|
+
data = BlobField(null=True)
|
|
166
|
+
confidence = FloatField(null=True)
|
|
167
|
+
group_uuid = TextField(null=True)
|
|
168
|
+
parent_group_uuid = TextField(null=True)
|
|
169
|
+
cell_index = IntegerField(null=True)
|
|
170
|
+
index = IntegerField(null=True)
|
|
171
|
+
note = TextField(null=True)
|
|
172
|
+
status = TextField(null=True)
|
|
173
|
+
owner_uri = TextField(null=True)
|
|
174
|
+
is_dirty = IntegerField(null=True)
|
|
175
|
+
|
|
176
|
+
class Meta:
|
|
177
|
+
table_name = "kddb_feature_tag"
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class DataAttribute(BaseModel):
|
|
181
|
+
id = AutoField()
|
|
182
|
+
data_object = ForeignKeyField(
|
|
183
|
+
DataObject, backref="attributes", column_name="data_object_id"
|
|
184
|
+
)
|
|
185
|
+
feature_tag = ForeignKeyField(
|
|
186
|
+
FeatureTag, backref="data_attributes", null=True, column_name="feature_tag_id"
|
|
187
|
+
)
|
|
188
|
+
tag = TextField(null=True)
|
|
189
|
+
value = TextField(null=True)
|
|
190
|
+
string_value = TextField(null=True)
|
|
191
|
+
path = TextField(null=True)
|
|
192
|
+
owner_uri = TextField(null=True)
|
|
193
|
+
type_at_creation = TextField(null=True)
|
|
194
|
+
decimal_value = FloatField(null=True)
|
|
195
|
+
boolean_value = IntegerField(null=True)
|
|
196
|
+
created = DateTimeField(default=datetime.datetime.now)
|
|
197
|
+
modified = DateTimeField(default=datetime.datetime.now)
|
|
198
|
+
confidence = FloatField(null=True)
|
|
199
|
+
truncated = BooleanField(default=False)
|
|
200
|
+
|
|
201
|
+
class Meta:
|
|
202
|
+
table_name = "kddb_data_attributes"
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class DataException(BaseModel):
|
|
206
|
+
id = AutoField()
|
|
207
|
+
data_object = ForeignKeyField(
|
|
208
|
+
DataObject, backref="data_exceptions", column_name="data_object_id"
|
|
209
|
+
)
|
|
210
|
+
data_attribute = ForeignKeyField(
|
|
211
|
+
DataAttribute, backref="exceptions", null=True, column_name="data_attribute_id"
|
|
212
|
+
)
|
|
213
|
+
message = TextField(null=True)
|
|
214
|
+
exception_details = TextField(null=True)
|
|
215
|
+
group_uuid = TextField(null=True)
|
|
216
|
+
tag_uuid = TextField(null=True)
|
|
217
|
+
exception_type = TextField(null=True)
|
|
218
|
+
severity = TextField(null=True)
|
|
219
|
+
path = TextField(null=True)
|
|
220
|
+
closing_comment = TextField(null=True)
|
|
221
|
+
open = BooleanField(default=True)
|
|
222
|
+
|
|
223
|
+
class Meta:
|
|
224
|
+
table_name = "kddb_data_exceptions"
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class TagMetadata(BaseModel):
|
|
228
|
+
id = AutoField()
|
|
229
|
+
data_object = ForeignKeyField(
|
|
230
|
+
DataObject, backref="tag_metadata", null=True, column_name="data_object_id"
|
|
231
|
+
)
|
|
232
|
+
data_attribute = ForeignKeyField(
|
|
233
|
+
DataAttribute,
|
|
234
|
+
backref="tag_metadata",
|
|
235
|
+
null=True,
|
|
236
|
+
column_name="data_attribute_id",
|
|
237
|
+
)
|
|
238
|
+
uuid = TextField(null=True)
|
|
239
|
+
group_uuid = TextField(null=True)
|
|
240
|
+
parent_group_uuid = TextField(null=True)
|
|
241
|
+
start_pos = IntegerField(null=True)
|
|
242
|
+
end_pos = IntegerField(null=True)
|
|
243
|
+
confidence = FloatField(null=True)
|
|
244
|
+
note = TextField(null=True)
|
|
245
|
+
status = TextField(null=True)
|
|
246
|
+
owner_uri = TextField(null=True)
|
|
247
|
+
is_dirty = BooleanField(default=False)
|
|
248
|
+
|
|
249
|
+
class Meta:
|
|
250
|
+
table_name = "kddb_tag_metadata"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
class Metadata(BaseModel):
|
|
254
|
+
id = AutoField()
|
|
255
|
+
metadata = BlobField(null=True)
|
|
256
|
+
|
|
257
|
+
class Meta:
|
|
258
|
+
table_name = "kddb_metadata"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class Step(BaseModel):
|
|
262
|
+
obj = BlobField()
|
|
263
|
+
|
|
264
|
+
class Meta:
|
|
265
|
+
table_name = "kddb_steps"
|
|
266
|
+
primary_key = False # No specific PK in schema
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class ProcessingSteps(BaseModel):
|
|
270
|
+
id = AutoField()
|
|
271
|
+
steps = BlobField()
|
|
272
|
+
|
|
273
|
+
class Meta:
|
|
274
|
+
table_name = "kddb_processing_steps"
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class Validations(BaseModel):
|
|
278
|
+
id = AutoField()
|
|
279
|
+
validations = BlobField()
|
|
280
|
+
|
|
281
|
+
class Meta:
|
|
282
|
+
table_name = "kddb_validations"
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class ExternalData(BaseModel):
|
|
286
|
+
id = AutoField()
|
|
287
|
+
taxonomy = ForeignKeyField(
|
|
288
|
+
Taxonomy, backref="external_data", null=True, column_name="taxonomy_id"
|
|
289
|
+
)
|
|
290
|
+
key = TextField()
|
|
291
|
+
data = BlobField()
|
|
292
|
+
|
|
293
|
+
class Meta:
|
|
294
|
+
table_name = "kddb_external_data"
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def initialize_database(db_path):
|
|
298
|
+
"""Initialize the database with the given path"""
|
|
299
|
+
database.init(db_path)
|
|
300
|
+
database.connect()
|
|
301
|
+
|
|
302
|
+
# Create missing tables
|
|
303
|
+
tables_to_create = []
|
|
304
|
+
all_tables = [
|
|
305
|
+
Taxonomy,
|
|
306
|
+
DataObject,
|
|
307
|
+
NodeType,
|
|
308
|
+
ContentNode,
|
|
309
|
+
ContentNodePart,
|
|
310
|
+
ContentException,
|
|
311
|
+
FeatureType,
|
|
312
|
+
Feature,
|
|
313
|
+
ContentNodeFeatureLink,
|
|
314
|
+
FeatureBlob,
|
|
315
|
+
FeatureBBox,
|
|
316
|
+
FeatureTag,
|
|
317
|
+
DataAttribute,
|
|
318
|
+
DataException,
|
|
319
|
+
TagMetadata,
|
|
320
|
+
Metadata,
|
|
321
|
+
Step,
|
|
322
|
+
ProcessingSteps,
|
|
323
|
+
Validations,
|
|
324
|
+
ExternalData,
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
for table in all_tables:
|
|
328
|
+
if not database.table_exists(table._meta.table_name):
|
|
329
|
+
tables_to_create.append(table)
|
|
330
|
+
|
|
331
|
+
if tables_to_create:
|
|
332
|
+
database.create_tables(tables_to_create)
|
|
333
|
+
|
|
334
|
+
# Migrate ContentException table to add node_uuid and make data_object_id nullable
|
|
335
|
+
if database.table_exists("kddb_content_exceptions"):
|
|
336
|
+
# Check if node_uuid column exists
|
|
337
|
+
cursor = database.execute_sql("PRAGMA table_info('kddb_content_exceptions')")
|
|
338
|
+
columns = [column[1] for column in cursor.fetchall()]
|
|
339
|
+
|
|
340
|
+
if "node_uuid" not in columns:
|
|
341
|
+
# Add node_uuid column
|
|
342
|
+
database.execute_sql(
|
|
343
|
+
"ALTER TABLE kddb_content_exceptions ADD COLUMN node_uuid TEXT"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Check if exception_type_id column exists
|
|
347
|
+
if "exception_type_id" not in columns:
|
|
348
|
+
# Add exception_type_id column
|
|
349
|
+
database.execute_sql(
|
|
350
|
+
"ALTER TABLE kddb_content_exceptions ADD COLUMN exception_type_id TEXT"
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Check if data_object_id has NOT NULL constraint
|
|
354
|
+
cursor = database.execute_sql("PRAGMA table_info('kddb_content_exceptions')")
|
|
355
|
+
has_not_null = False
|
|
356
|
+
for column in cursor.fetchall():
|
|
357
|
+
if (
|
|
358
|
+
column[1] == "data_object_id" and column[3] == 1
|
|
359
|
+
): # column[3] is notnull flag
|
|
360
|
+
has_not_null = True
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
if has_not_null:
|
|
364
|
+
# We need to recreate the table to make data_object_id nullable
|
|
365
|
+
# Execute each statement individually
|
|
366
|
+
|
|
367
|
+
# Disable foreign keys
|
|
368
|
+
database.execute_sql("PRAGMA foreign_keys=off")
|
|
369
|
+
|
|
370
|
+
# Start transaction
|
|
371
|
+
database.execute_sql("BEGIN TRANSACTION")
|
|
372
|
+
|
|
373
|
+
# Create temporary table
|
|
374
|
+
database.execute_sql(
|
|
375
|
+
"""
|
|
376
|
+
CREATE TABLE kddb_content_exceptions_new (
|
|
377
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
378
|
+
data_object_id INTEGER,
|
|
379
|
+
message TEXT,
|
|
380
|
+
exception_details TEXT,
|
|
381
|
+
exception_type TEXT,
|
|
382
|
+
severity TEXT,
|
|
383
|
+
path TEXT,
|
|
384
|
+
closing_comment TEXT,
|
|
385
|
+
open BOOLEAN DEFAULT 1,
|
|
386
|
+
node_uuid TEXT,
|
|
387
|
+
exception_type_id TEXT,
|
|
388
|
+
FOREIGN KEY (data_object_id) REFERENCES kddb_data_objects (id)
|
|
389
|
+
)
|
|
390
|
+
"""
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Copy data
|
|
394
|
+
database.execute_sql(
|
|
395
|
+
"""
|
|
396
|
+
INSERT INTO kddb_content_exceptions_new
|
|
397
|
+
SELECT id, data_object_id, message, exception_details, exception_type,
|
|
398
|
+
severity, path, closing_comment, open, node_uuid, exception_type_id
|
|
399
|
+
FROM kddb_content_exceptions
|
|
400
|
+
"""
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# Drop old table
|
|
404
|
+
database.execute_sql("DROP TABLE kddb_content_exceptions")
|
|
405
|
+
|
|
406
|
+
# Rename new table
|
|
407
|
+
database.execute_sql(
|
|
408
|
+
"ALTER TABLE kddb_content_exceptions_new RENAME TO kddb_content_exceptions"
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Commit transaction
|
|
412
|
+
database.execute_sql("COMMIT")
|
|
413
|
+
|
|
414
|
+
# Enable foreign keys
|
|
415
|
+
database.execute_sql("PRAGMA foreign_keys=on")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def close_database():
|
|
419
|
+
"""Close the database connection"""
|
|
420
|
+
if not database.is_closed():
|
|
421
|
+
database.close()
|