kodexa-document 7.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodexa-document might be problematic. Click here for more details.

@@ -0,0 +1,421 @@
1
+ from peewee import *
2
+ import json
3
+ import datetime
4
+ import msgpack
5
+ from playhouse.sqlite_ext import JSONField, BlobField
6
+
7
+ database = SqliteDatabase(None) # Will be initialized later with actual DB path
8
+
9
+
10
+ class BaseModel(Model):
11
+ class Meta:
12
+ database = database
13
+
14
+
15
+ class Taxonomy(BaseModel):
16
+ id = AutoField()
17
+ ref = TextField()
18
+
19
+ class Meta:
20
+ table_name = "kddb_taxonomy"
21
+
22
+
23
+ class DataObject(BaseModel):
24
+ id = AutoField()
25
+ parent = ForeignKeyField(
26
+ "self", backref="children", null=True, column_name="parent_id"
27
+ )
28
+ taxonomy = ForeignKeyField(
29
+ Taxonomy, backref="data_objects", null=True, column_name="taxonomy_id"
30
+ )
31
+ idx = IntegerField(null=True)
32
+ path = TextField(null=True)
33
+ group_uuid = TextField(null=True)
34
+ cell_index = IntegerField(null=True)
35
+ source_ordering = TextField(null=True)
36
+ created = DateTimeField(default=datetime.datetime.now)
37
+ modified = DateTimeField(default=datetime.datetime.now)
38
+ lineage = JSONField(null=True)
39
+ deleted = BooleanField(default=False)
40
+
41
+ class Meta:
42
+ table_name = "kddb_data_objects"
43
+
44
+
45
+ class NodeType(BaseModel):
46
+ id = AutoField()
47
+ name = TextField()
48
+
49
+ class Meta:
50
+ table_name = "kddb_node_types"
51
+
52
+
53
+ class ContentNode(BaseModel):
54
+ id = AutoField()
55
+ parent = ForeignKeyField(
56
+ "self", backref="children", null=True, column_name="parent_id"
57
+ )
58
+ node_type = TextField()
59
+ content = TextField(null=True)
60
+ created = DateTimeField(default=datetime.datetime.now)
61
+ modified = DateTimeField(default=datetime.datetime.now)
62
+ index = IntegerField(null=True)
63
+
64
+ class Meta:
65
+ table_name = "kddb_content_nodes"
66
+
67
+
68
+ class ContentNodePart(BaseModel):
69
+ id = AutoField()
70
+ content_node = ForeignKeyField(
71
+ ContentNode, backref="parts", column_name="content_node_id"
72
+ )
73
+ pos = IntegerField()
74
+ content = TextField(null=True)
75
+ content_idx = IntegerField(null=True)
76
+
77
+ class Meta:
78
+ table_name = "kddb_content_node_parts"
79
+
80
+
81
+ class ContentException(BaseModel):
82
+ id = AutoField()
83
+ data_object = ForeignKeyField(
84
+ DataObject,
85
+ backref="content_exceptions",
86
+ null=True,
87
+ column_name="data_object_id",
88
+ )
89
+ message = TextField(null=True)
90
+ exception_details = TextField(null=True)
91
+ exception_type = TextField(null=True)
92
+ severity = TextField(null=True)
93
+ path = TextField(null=True)
94
+ closing_comment = TextField(null=True)
95
+ open = BooleanField(default=True)
96
+ node_uuid = TextField(null=True)
97
+ exception_type_id = TextField(null=True)
98
+
99
+ class Meta:
100
+ table_name = "kddb_content_exceptions"
101
+
102
+
103
+ class FeatureType(BaseModel):
104
+ id = AutoField()
105
+ name = TextField()
106
+
107
+ class Meta:
108
+ table_name = "kddb_feature_types"
109
+
110
+
111
+ class Feature(BaseModel):
112
+ id = AutoField()
113
+ feature_type = ForeignKeyField(
114
+ FeatureType, backref="features", column_name="feature_type_id"
115
+ )
116
+
117
+ class Meta:
118
+ table_name = "kddb_features"
119
+
120
+
121
+ class ContentNodeFeatureLink(BaseModel):
122
+ id = AutoField()
123
+ content_node = ForeignKeyField(
124
+ ContentNode, backref="feature_links", column_name="content_node_id"
125
+ )
126
+ feature = ForeignKeyField(
127
+ Feature, backref="content_node_links", column_name="feature_id"
128
+ )
129
+
130
+ class Meta:
131
+ table_name = "kddb_content_node_feature_links"
132
+ indexes = (
133
+ (("content_node_id", "feature_id"), True), # Ensure uniqueness of pairs
134
+ )
135
+
136
+
137
+ class FeatureBlob(BaseModel):
138
+ id = AutoField()
139
+ feature = ForeignKeyField(Feature, backref="blobs", column_name="feature_id")
140
+ binary_value = BlobField()
141
+
142
+ class Meta:
143
+ table_name = "kddb_feature_blob"
144
+
145
+
146
+ class FeatureBBox(BaseModel):
147
+ id = AutoField()
148
+ feature = ForeignKeyField(Feature, backref="bboxes", column_name="feature_id")
149
+ x1 = FloatField()
150
+ y1 = FloatField()
151
+ x2 = FloatField()
152
+ y2 = FloatField()
153
+
154
+ class Meta:
155
+ table_name = "kddb_feature_bbox"
156
+
157
+
158
+ class FeatureTag(BaseModel):
159
+ id = AutoField()
160
+ feature = ForeignKeyField(Feature, backref="tags", column_name="feature_id")
161
+ tag_value = TextField(null=True)
162
+ start_pos = IntegerField(null=True)
163
+ end_pos = IntegerField(null=True)
164
+ uuid = TextField(null=True)
165
+ data = BlobField(null=True)
166
+ confidence = FloatField(null=True)
167
+ group_uuid = TextField(null=True)
168
+ parent_group_uuid = TextField(null=True)
169
+ cell_index = IntegerField(null=True)
170
+ index = IntegerField(null=True)
171
+ note = TextField(null=True)
172
+ status = TextField(null=True)
173
+ owner_uri = TextField(null=True)
174
+ is_dirty = IntegerField(null=True)
175
+
176
+ class Meta:
177
+ table_name = "kddb_feature_tag"
178
+
179
+
180
+ class DataAttribute(BaseModel):
181
+ id = AutoField()
182
+ data_object = ForeignKeyField(
183
+ DataObject, backref="attributes", column_name="data_object_id"
184
+ )
185
+ feature_tag = ForeignKeyField(
186
+ FeatureTag, backref="data_attributes", null=True, column_name="feature_tag_id"
187
+ )
188
+ tag = TextField(null=True)
189
+ value = TextField(null=True)
190
+ string_value = TextField(null=True)
191
+ path = TextField(null=True)
192
+ owner_uri = TextField(null=True)
193
+ type_at_creation = TextField(null=True)
194
+ decimal_value = FloatField(null=True)
195
+ boolean_value = IntegerField(null=True)
196
+ created = DateTimeField(default=datetime.datetime.now)
197
+ modified = DateTimeField(default=datetime.datetime.now)
198
+ confidence = FloatField(null=True)
199
+ truncated = BooleanField(default=False)
200
+
201
+ class Meta:
202
+ table_name = "kddb_data_attributes"
203
+
204
+
205
+ class DataException(BaseModel):
206
+ id = AutoField()
207
+ data_object = ForeignKeyField(
208
+ DataObject, backref="data_exceptions", column_name="data_object_id"
209
+ )
210
+ data_attribute = ForeignKeyField(
211
+ DataAttribute, backref="exceptions", null=True, column_name="data_attribute_id"
212
+ )
213
+ message = TextField(null=True)
214
+ exception_details = TextField(null=True)
215
+ group_uuid = TextField(null=True)
216
+ tag_uuid = TextField(null=True)
217
+ exception_type = TextField(null=True)
218
+ severity = TextField(null=True)
219
+ path = TextField(null=True)
220
+ closing_comment = TextField(null=True)
221
+ open = BooleanField(default=True)
222
+
223
+ class Meta:
224
+ table_name = "kddb_data_exceptions"
225
+
226
+
227
+ class TagMetadata(BaseModel):
228
+ id = AutoField()
229
+ data_object = ForeignKeyField(
230
+ DataObject, backref="tag_metadata", null=True, column_name="data_object_id"
231
+ )
232
+ data_attribute = ForeignKeyField(
233
+ DataAttribute,
234
+ backref="tag_metadata",
235
+ null=True,
236
+ column_name="data_attribute_id",
237
+ )
238
+ uuid = TextField(null=True)
239
+ group_uuid = TextField(null=True)
240
+ parent_group_uuid = TextField(null=True)
241
+ start_pos = IntegerField(null=True)
242
+ end_pos = IntegerField(null=True)
243
+ confidence = FloatField(null=True)
244
+ note = TextField(null=True)
245
+ status = TextField(null=True)
246
+ owner_uri = TextField(null=True)
247
+ is_dirty = BooleanField(default=False)
248
+
249
+ class Meta:
250
+ table_name = "kddb_tag_metadata"
251
+
252
+
253
+ class Metadata(BaseModel):
254
+ id = AutoField()
255
+ metadata = BlobField(null=True)
256
+
257
+ class Meta:
258
+ table_name = "kddb_metadata"
259
+
260
+
261
+ class Step(BaseModel):
262
+ obj = BlobField()
263
+
264
+ class Meta:
265
+ table_name = "kddb_steps"
266
+ primary_key = False # No specific PK in schema
267
+
268
+
269
+ class ProcessingSteps(BaseModel):
270
+ id = AutoField()
271
+ steps = BlobField()
272
+
273
+ class Meta:
274
+ table_name = "kddb_processing_steps"
275
+
276
+
277
+ class Validations(BaseModel):
278
+ id = AutoField()
279
+ validations = BlobField()
280
+
281
+ class Meta:
282
+ table_name = "kddb_validations"
283
+
284
+
285
+ class ExternalData(BaseModel):
286
+ id = AutoField()
287
+ taxonomy = ForeignKeyField(
288
+ Taxonomy, backref="external_data", null=True, column_name="taxonomy_id"
289
+ )
290
+ key = TextField()
291
+ data = BlobField()
292
+
293
+ class Meta:
294
+ table_name = "kddb_external_data"
295
+
296
+
297
+ def initialize_database(db_path):
298
+ """Initialize the database with the given path"""
299
+ database.init(db_path)
300
+ database.connect()
301
+
302
+ # Create missing tables
303
+ tables_to_create = []
304
+ all_tables = [
305
+ Taxonomy,
306
+ DataObject,
307
+ NodeType,
308
+ ContentNode,
309
+ ContentNodePart,
310
+ ContentException,
311
+ FeatureType,
312
+ Feature,
313
+ ContentNodeFeatureLink,
314
+ FeatureBlob,
315
+ FeatureBBox,
316
+ FeatureTag,
317
+ DataAttribute,
318
+ DataException,
319
+ TagMetadata,
320
+ Metadata,
321
+ Step,
322
+ ProcessingSteps,
323
+ Validations,
324
+ ExternalData,
325
+ ]
326
+
327
+ for table in all_tables:
328
+ if not database.table_exists(table._meta.table_name):
329
+ tables_to_create.append(table)
330
+
331
+ if tables_to_create:
332
+ database.create_tables(tables_to_create)
333
+
334
+ # Migrate ContentException table to add node_uuid and make data_object_id nullable
335
+ if database.table_exists("kddb_content_exceptions"):
336
+ # Check if node_uuid column exists
337
+ cursor = database.execute_sql("PRAGMA table_info('kddb_content_exceptions')")
338
+ columns = [column[1] for column in cursor.fetchall()]
339
+
340
+ if "node_uuid" not in columns:
341
+ # Add node_uuid column
342
+ database.execute_sql(
343
+ "ALTER TABLE kddb_content_exceptions ADD COLUMN node_uuid TEXT"
344
+ )
345
+
346
+ # Check if exception_type_id column exists
347
+ if "exception_type_id" not in columns:
348
+ # Add exception_type_id column
349
+ database.execute_sql(
350
+ "ALTER TABLE kddb_content_exceptions ADD COLUMN exception_type_id TEXT"
351
+ )
352
+
353
+ # Check if data_object_id has NOT NULL constraint
354
+ cursor = database.execute_sql("PRAGMA table_info('kddb_content_exceptions')")
355
+ has_not_null = False
356
+ for column in cursor.fetchall():
357
+ if (
358
+ column[1] == "data_object_id" and column[3] == 1
359
+ ): # column[3] is notnull flag
360
+ has_not_null = True
361
+ break
362
+
363
+ if has_not_null:
364
+ # We need to recreate the table to make data_object_id nullable
365
+ # Execute each statement individually
366
+
367
+ # Disable foreign keys
368
+ database.execute_sql("PRAGMA foreign_keys=off")
369
+
370
+ # Start transaction
371
+ database.execute_sql("BEGIN TRANSACTION")
372
+
373
+ # Create temporary table
374
+ database.execute_sql(
375
+ """
376
+ CREATE TABLE kddb_content_exceptions_new (
377
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
378
+ data_object_id INTEGER,
379
+ message TEXT,
380
+ exception_details TEXT,
381
+ exception_type TEXT,
382
+ severity TEXT,
383
+ path TEXT,
384
+ closing_comment TEXT,
385
+ open BOOLEAN DEFAULT 1,
386
+ node_uuid TEXT,
387
+ exception_type_id TEXT,
388
+ FOREIGN KEY (data_object_id) REFERENCES kddb_data_objects (id)
389
+ )
390
+ """
391
+ )
392
+
393
+ # Copy data
394
+ database.execute_sql(
395
+ """
396
+ INSERT INTO kddb_content_exceptions_new
397
+ SELECT id, data_object_id, message, exception_details, exception_type,
398
+ severity, path, closing_comment, open, node_uuid, exception_type_id
399
+ FROM kddb_content_exceptions
400
+ """
401
+ )
402
+
403
+ # Drop old table
404
+ database.execute_sql("DROP TABLE kddb_content_exceptions")
405
+
406
+ # Rename new table
407
+ database.execute_sql(
408
+ "ALTER TABLE kddb_content_exceptions_new RENAME TO kddb_content_exceptions"
409
+ )
410
+
411
+ # Commit transaction
412
+ database.execute_sql("COMMIT")
413
+
414
+ # Enable foreign keys
415
+ database.execute_sql("PRAGMA foreign_keys=on")
416
+
417
+
418
+ def close_database():
419
+ """Close the database connection"""
420
+ if not database.is_closed():
421
+ database.close()
@@ -0,0 +1,5 @@
1
+ """
2
+ Selectors allow you to work with a Kodexa document to find content
3
+ """
4
+
5
+ from .parser import parse