dbis-functional-dependencies 0.0.7__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,619 +0,0 @@
1
- '''
2
- Created on 25.05.2022
3
-
4
- @author: maxwellgerber
5
-
6
- see https://gist.github.com/maxwellgerber/4caae07161ea66123de4d6c374387786
7
-
8
- '''
9
- from itertools import combinations, permutations, chain
10
- import datetime
11
- from functional_dependencies.fdsbase import Attribute, Set, Notation, FD
12
-
13
-
14
- class FunctionalDependencySet:
15
- '''
16
- a functional dependency set
17
- '''
18
-
19
- def __init__(self, attributes:str="",title:str="",description:str="",notation:Notation=None,debug:bool=False):
20
- '''
21
- constructor
22
-
23
- Args:
24
- attributes(str): a string of attribute variable names of the scheme
25
- title(str): a title for this functional Dependency Set
26
- description(str): a description for this functional Dependency Set
27
- notation(Notation): the notation to be used
28
- debug(bool): if True switch debugging on
29
- '''
30
- self.title=title
31
- self.description=description
32
- if notation is None:
33
- notation=Notation.utf8
34
- self.notation=notation
35
- self.debug=debug
36
- self.isodate=datetime.datetime.now().isoformat()
37
- #list of FDs of the scheme. An FD is stored as a tuple (x, y), meaning x -> y
38
- self.dependencies = []
39
-
40
- #set of attributes of the scheme
41
- self.attributes = Set()
42
- self.attribute_map={}
43
-
44
- self.isdecomposed = False
45
-
46
- for attr in attributes:
47
- self.add_attribute(attr)
48
-
49
-
50
- def __str__(self):
51
- '''
52
- return my text representation
53
- '''
54
- text=self.stringify_dependencies()
55
- return text
56
-
57
- def set_list_as_text_list(self,set_list:list,notation:Notation):
58
- '''
59
- convert a list of sets to a list of strings using the given delimiter
60
-
61
- Args:
62
- set_list(list): list of sets
63
- notation(Notation): the notation to use
64
-
65
- Returns:
66
- list: of stringified sets
67
- '''
68
- text_list=[]
69
- for a_set in set_list:
70
- text_list.append(Set.stringify_set(a_set,notation=notation))
71
- text_list=sorted(text_list)
72
- return text_list
73
-
74
- def copy(self):
75
- fds = FunctionalDependencySet()
76
- fds.title = self.title
77
- fds.description = self.description
78
- fds.notation = self.notation
79
- fds.debug = self.debug
80
- fds.attributes = self.attributes.copy()
81
- fds.attribute_map = self.attribute_map.copy()
82
- fds.dependencies = self.dependencies.copy()
83
- fds.isdecomposed = self.isdecomposed
84
-
85
- def stringify_dependencies(self):
86
- '''
87
- stringifies the set of dependencies
88
- '''
89
- text='{'
90
- delim=''
91
- if self.notation==Notation.math or self.notation==Notation.plain:
92
- fdNotation=self.notation
93
- else:
94
- fdNotation=Notation.utf8
95
- for left,right in self.dependencies:
96
- fd=FD(left,right)
97
- fdtext=FD.stringify_FD(fd, fdNotation)
98
- text+=f"{delim}{fdtext}"
99
- delim=","
100
- text+="}"
101
- return text
102
-
103
- def add_attribute(self, attr_var:str,attr_english_name:str=None,attr_german_name:str=None):
104
- '''
105
- add attribute to the attribute set of the scheme
106
-
107
- Args:
108
- attr_var(string): attribute variable name to be added to the scheme
109
- attr_english_name(string): the name of the attribute in english
110
- attr_german_name(string): the name of the attribute in german
111
- '''
112
- if attr_english_name is None:
113
- attr_english_name=attr_var
114
- if attr_german_name is None:
115
- attr_german_name=attr_english_name
116
- attr=Attribute(attr_var,attr_english_name,attr_german_name)
117
- self.attributes.add(attr_var)
118
- self.attribute_map[attr_var]=attr
119
-
120
- def add_dependency(self, pre, post):
121
- '''
122
- add dependency to the dependency list of the scheme
123
-
124
- Args:
125
- pre(set): attributes that initiate the FD (left of the arrow)
126
- post(set): attributes that are determined by the FD (right of the arrow)
127
- '''
128
- for i in chain(pre,post):
129
- if i not in self.attributes:
130
- # exception when an attribute is used that is not in the list of attributes of the dependency
131
- raise Exception(f"Attribute {i} does not exist")
132
- self.dependencies.append((set(pre),set(post)))
133
-
134
- def remove_dependency(self, pre, post):
135
- '''
136
- remove dependency from the dependency list of the scheme
137
-
138
- Args:
139
- pre(str): attributes that initiate the FD (left of the arrow)
140
- post(str): attributes that are determined by the FD (right of the arrow)
141
- '''
142
- for i in chain(pre, post):
143
- if i not in self.attributes:
144
- # exception when an attribute is used that is not in the list of attributes of the dependency
145
- raise Exception(f"Attribute {i} does not exist")
146
- self.dependencies.remove((set(pre), set(post)))
147
-
148
-
149
- def get_attr_closure(self, attr):
150
- '''
151
- get the close of the given attribute
152
-
153
- Args:
154
- attr(str): the name of the attribute to calculate the closure for
155
-
156
- Returns:
157
- set: the closure of the attribute
158
- '''
159
- #closure set is build up iteratively, until it does not expand anymore
160
- closure = set(attr)
161
- #set of previous iteration
162
- last = set()
163
- while closure != last:
164
- last = closure.copy()
165
- #check all FDs whether their initiators are part of the closure
166
- #and add closure of the respective FD to the calculating closure
167
- for dep in self.dependencies:
168
- left,right=dep
169
- if left.issubset(closure):
170
- closure.update(right)
171
- return closure
172
-
173
- def attribute_combinations(self):
174
- '''
175
- generator for keys
176
- '''
177
- for i in range(1, len(self.attributes)+1):
178
- for keys in combinations(self.attributes, i):
179
- yield keys
180
-
181
- def find_candidate_keys(self):
182
- '''
183
- find candidate keys of the scheme
184
- '''
185
- ans = []
186
- #check closures of all attributes and attribute combinations iteratively
187
- #smaller candidate keys added first
188
- for keys in self.attribute_combinations():
189
- if(self.get_attr_closure(keys) == self.attributes):
190
- k = set(keys)
191
- #no subset of currently checked key is already in
192
- if not any([x.issubset(k) for x in ans]):
193
- ans.append(k)
194
- return ans
195
-
196
- def isBCNF(self):
197
- '''
198
- tests whether an fdset is in BCNF: every left side of every dependency is a superkey
199
- '''
200
- for left,right in self.dependencies:
201
- closure_left = self.get_attr_closure(left)
202
- if closure_left != self.attributes:
203
- return False
204
- return True
205
-
206
- def decompose2(self):
207
- '''
208
- decomposition algorithm according to DBIS-VL
209
- Source: https://dbis.rwth-aachen.de/dbis-vl/RelDesign#page=82
210
- '''
211
- self.isdecomposed = True
212
- fds = FunctionalDependencySet()
213
- fds.attributes = self.attributes.copy()
214
- fds.dependencies = self.dependencies.copy()
215
- not_bcnf = [fds]
216
- bcnf = []
217
- while not_bcnf:
218
- fds = not_bcnf.pop(0)
219
- if fds.isBCNF():
220
- bcnf.append(fds.attributes)
221
- else:
222
- new_fds1 = FunctionalDependencySet()
223
- new_fds2 = FunctionalDependencySet()
224
- for dep in fds.dependencies:
225
- left, right = dep
226
- if fds.get_attr_closure(left) != fds.attributes:
227
- # create new fdsets along the dependecy we decompose with
228
- new_fds1.attributes = left | right
229
- new_fds2.attributes = fds.attributes - right
230
- # find dependencies that belong to the new fdsets
231
- new_fds1.dependencies = [fd for fd in fds.dependencies if (fd[0] | fd[1]) <= new_fds1.attributes]
232
- new_fds2.dependencies = [fd for fd in fds.dependencies if (fd[0] | fd[1]) <= new_fds2.attributes]
233
- new_fds2.dependencies = new_fds2.dependencies + [(fd[0], fd[1].intersection(new_fds2.attributes)) for fd in fds.dependencies if fd[0] <= new_fds2.attributes and len(fd[1].intersection(new_fds2.attributes)) != 0]
234
- break
235
- not_bcnf.append(new_fds1)
236
- not_bcnf.append(new_fds2)
237
- self.tables = bcnf
238
- return bcnf
239
-
240
-
241
- def decompose(self):
242
- '''
243
- decomposition algorithm
244
- '''
245
- self.isdecomposed = True
246
- self.tables = [self.attributes]
247
- for dep in self.dependencies:
248
- left,right=dep
249
- for attr_set in self.tables:
250
- #newset contains the unity of attributes of the FD
251
- newset = left.symmetric_difference(right)
252
- #if newset is real subset, extra attributes still exist
253
- #--> need to break it up
254
- if newset.issubset(attr_set) and newset != attr_set:
255
- ## print("Splitting {} into {} and {}".format(attr_set, attr_set.difference(dep[1]), newset))
256
- #split attributes of the FD closure off the attribute set
257
- attr_set.difference_update(right)
258
-
259
- #add new BCNF set to list of attribute sets
260
- self.tables.append(newset)
261
- return self.tables
262
-
263
- def decompose_all(self):
264
- ## Messy sets and tuples to get rid of duplicates, eew
265
- tables_possibilities = []
266
-
267
- for ordering in permutations(self.dependencies):
268
- tbl = [self.attributes.copy()]
269
-
270
- for dep in ordering:
271
- left,right=dep
272
- for attr_set in tbl:
273
- newset = left.symmetric_difference(right)
274
- if newset.issubset(attr_set) and newset != attr_set:
275
- attr_set.difference_update(right)
276
- tbl.append(newset)
277
-
278
- tbl = [tuple(x) for x in tbl]
279
- tables_possibilities.append(tuple(tbl))
280
-
281
- return set(tables_possibilities)
282
-
283
- def is_lossy(self):
284
- '''
285
- check for lossyness
286
-
287
- Returns:
288
- bool: True if if one of my dependencies is not preserved
289
- '''
290
- if not self.isdecomposed:
291
- raise Exception("Can't tell if lossy if the FD hasn't been decomposed yet")
292
- for dep in self.dependencies:
293
- if not self.is_preserved(dep):
294
- return True
295
- return False
296
-
297
- def is_preserved(self, dep):
298
- '''
299
- check whether the given dependency is preserved
300
-
301
- Args:
302
- dep(): the dependency to check
303
-
304
- Returns:
305
- bool: True if
306
- '''
307
- left,right=dep
308
- pre = left.symmetric_difference(right)
309
- for attr_set in self.tables:
310
- if pre == attr_set:
311
- return True
312
- return False
313
-
314
- def calculate_fds_in_subset(self, subset):
315
- '''
316
- calculate all dependencies in a subset. Also includes dependencies for which
317
- attribute parts are missing because they are not in the subset. Does not include
318
- original dependencies that have lost all there attributes in precondition or closure
319
-
320
- '''
321
- subset_dependencies = []
322
- for dep in self.dependencies:
323
- new_dep_pre = set()
324
- new_dep_post = set()
325
- left,right=dep
326
- #check whether attributes occur in pre or post of the original FD
327
- for attr in left:
328
- if attr in subset:
329
- new_dep_pre.add(attr)
330
- for attr in right:
331
- if attr in subset:
332
- new_dep_post.add(attr)
333
- #only add new dependency if none of both sides is empty
334
- if new_dep_pre != set() and new_dep_post != set():
335
- subset_dependencies.append((new_dep_pre, new_dep_post))
336
- return subset_dependencies
337
-
338
-
339
- def is2NF(self):
340
- '''
341
- calculates whether the FD set is in 2NF: Every attribute has to depend on the whole CK.
342
- Check for every attribute whether ther is a part of any of the CKs which has the attribute in its closure
343
- '''
344
- ckeys = self.find_candidate_keys()
345
- # check every non-ck-attribute
346
- for attr in self.attributes:
347
- skip = False
348
- for ckey in ckeys:
349
- for ckey_part in ckey:
350
- if attr == ckey_part:
351
- skip = True
352
-
353
- if skip == True:
354
- continue
355
-
356
- # check every key candidate
357
- for ckey in ckeys:
358
- # check every subset of keys (not yet)
359
- for ckey_part in ckey:
360
- ckey_part_closure = self.get_attr_closure(ckey_part)
361
- if attr in ckey_part_closure:
362
- return False
363
- return True
364
-
365
- def is3NF(self):
366
- '''
367
- calculates whether the FD set is in 3NF: There are no dependencies between non-key attributes
368
- '''
369
- ckeys = self.find_candidate_keys()
370
-
371
- for dep in self.dependencies:
372
- left,right=dep
373
- # get all attributes of an fd
374
- dep_attributes = set()
375
- dep_attributes.update(left)
376
- dep_attributes.update(right)
377
- dep_has_ckey_attr = False
378
-
379
- # check all attributes of the fd whether at least one of them is contained in a ckey
380
- for attr in dep_attributes:
381
- for ckey in ckeys:
382
- if set(attr).issubset(ckey):
383
- dep_has_ckey_attr = True
384
- break
385
- if not dep_has_ckey_attr:
386
- return False
387
- return True
388
-
389
- def generate_cluster(self,shape:str='box',indent:str=' '):
390
- '''
391
- graphviz digraph subgraph (cluster) generation for this functional dependency set
392
-
393
- Args:
394
- shape(str): the shape to use - default: box
395
- indent(str): indentation - default: two spaces
396
- Return:
397
- str: graphviz markup
398
- '''
399
- markup=''
400
- # sort dependencies by largest pre
401
- dependencies = self.dependencies.copy()
402
- dependencies.sort(key=lambda dep: len(dep[0]), reverse=True)
403
-
404
- # collect attributes that are only on the right side
405
- only_post = self.attributes.copy()
406
- # generate clusters
407
- cluster_markup = ''
408
- for dep in dependencies:
409
- pre, post = dep
410
- only_post -= pre
411
- cluster_name=''.join(sorted(pre))
412
- cluster_markup += f'{indent}subgraph cluster_{cluster_name}{{\n'
413
- cluster_markup += f'{indent} label="{cluster_name}"\n'
414
- for attrVar in sorted(pre):
415
- attr = self.attribute_map[attrVar]
416
- cluster_markup += f'{indent}{indent}{attrVar} [shape={shape} label="{attr}"]\n'
417
- cluster_markup += f'{indent}}}\n'
418
-
419
- # generate arrows
420
- arrow_markup = ''
421
- for dep in dependencies:
422
- pre, post = dep
423
- for attrVar in sorted(post):
424
- arrow_markup += f'{indent}{sorted(pre)[0]}->{attrVar}\n'
425
-
426
- # create markup for only post attributes
427
- only_post_markup = ''
428
- for attrVar in sorted(only_post):
429
- attr = self.attribute_map[attrVar]
430
- only_post_markup += f'{indent}{attrVar} [shape={shape} label="{attr}"]\n'
431
-
432
- # concatenate markup
433
- markup += only_post_markup
434
- markup += cluster_markup
435
- markup += arrow_markup
436
- return markup
437
-
438
- def as_graphviz(self,withCluster:bool=True):
439
- '''
440
-
441
- convert me to a graphviz markup e.g. to try out in
442
-
443
- http://magjac.com/graphviz-visual-editor/
444
- or
445
- http://diagrams.bitplan.com
446
-
447
- Return:
448
- str: the graphviz markup for this functional DependencySet
449
- '''
450
- markup=f"#generated by {__file__} on {self.isodate}\n"
451
- markup+="digraph functionalDependencySet{"
452
- # add title see https://stackoverflow.com/a/6452088/1497139
453
- markup+=f'''
454
- // title
455
- labelloc="t";
456
- label="{self.title}"
457
- '''
458
- if not withCluster:
459
- markup+="// Attribute variables \n"
460
- for attrVar in sorted(self.attributes):
461
- attr=self.attribute_map[attrVar]
462
- markup+=(f""" {attrVar} [ shape=box label="{attr}"] \n""")
463
- else:
464
- markup+=self.generate_cluster()
465
- markup+="}"
466
- return markup
467
-
468
- def left_reduction(self):
469
- '''
470
- executes a left reduction on the dependencies from this fdset
471
- '''
472
- remaining_deps = self.dependencies.copy()
473
- while remaining_deps:
474
- dep = remaining_deps.pop(0)
475
- pre, post = dep
476
- for attr in sorted(pre):
477
- if post <= self.get_attr_closure(pre - {attr}):
478
- self.remove_dependency(pre, post)
479
- self.add_dependency(pre - {attr}, post)
480
- pre = pre - {attr}
481
-
482
- def right_reduction(self):
483
- '''
484
- executes a right reduction on the dependencies from this fdset
485
- '''
486
- remaining_deps = self.dependencies.copy()
487
- while remaining_deps:
488
- dep = remaining_deps.pop(0)
489
- pre, post = dep
490
- for attr in sorted(post):
491
- self.remove_dependency(pre, post)
492
- self.add_dependency(pre, post - {attr})
493
- if {attr} <= set(self.get_attr_closure(pre)):
494
- post = post - {attr}
495
- else:
496
- self.remove_dependency(pre, post - {attr})
497
- self.add_dependency(pre, post)
498
-
499
- def remove_empty_fds(self):
500
- '''
501
- remove empty fds of form "A → {}" from this fdset
502
- '''
503
- deps_copy = self.dependencies.copy()
504
- for dep in deps_copy:
505
- pre, post = dep
506
- if len(post) == 0:
507
- self.remove_dependency(pre, post)
508
-
509
- def combine_fds(self):
510
- '''
511
- executes a left reduction for this fdset
512
- '''
513
- combined_dependencies = []
514
- deps_copy = self.dependencies.copy()
515
- while self.dependencies:
516
- pre, post = self.dependencies.pop(0)
517
- new_post = post
518
- deps_copy = self.dependencies.copy()
519
- for dep in deps_copy:
520
- left,right= dep
521
- if left == pre:
522
- new_post = new_post | right
523
- self.remove_dependency(left, right)
524
- combined_dependencies.append((pre, new_post))
525
- self.dependencies = combined_dependencies
526
-
527
- def canonical_cover(self):
528
- '''
529
- determines the canonical cover of this fdset
530
-
531
- 4 substeps with respective functions
532
-
533
- https://git.rwth-aachen.de/i5/teaching/dbis-vl/-/raw/main/6-RelDesign/6-RelationaleEntwurfstheorie.pdf#page=39
534
- '''
535
- self.left_reduction()
536
- self.right_reduction()
537
- self.remove_empty_fds()
538
- self.combine_fds()
539
-
540
- def create_new_fdsets(self):
541
- '''
542
- create fdsets from the dependencies resulting from the canonical cover
543
-
544
- Return:
545
- list[FunctionalDependencySet]: list of fdsets created from the dependencies resulting from the canonical cover
546
- '''
547
- deps = self.dependencies.copy()
548
- i = 1
549
- new_fdsets = []
550
- while deps:
551
- tmp = deps.pop(0)
552
- pre, post = tmp
553
- new_attributes = pre | post
554
- new_deps = [tmp]
555
- for dep in deps:
556
- left,right=dep
557
- if left | right <= new_attributes:
558
- new_deps.append(dep)
559
- deps.remove(dep)
560
- fds = FunctionalDependencySet(new_attributes, 'R' + str(i))
561
- i += 1
562
- for dep in new_deps:
563
- left,right=dep
564
- fds.add_dependency(left,right)
565
- new_fdsets.append(fds)
566
- return new_fdsets
567
-
568
- def synthesize(self):
569
- '''
570
- synthesize algorithm
571
-
572
- see https://git.rwth-aachen.de/i5/teaching/dbis-vl/-/raw/main/6-RelDesign/6-RelationaleEntwurfstheorie.pdf#page=76
573
- and Kemper page 197
574
-
575
- Return:
576
- list[FunctionalDependencySet]: list of synthesized fdsets deriving from this fdset
577
- '''
578
- keys = self.find_candidate_keys()
579
- self.canonical_cover()
580
- fdsets = self.create_new_fdsets()
581
- fdsets_with_key = self.create_optional_key_scheme(keys, fdsets)
582
- reduced_fdsets = self.remove_subset_relations(fdsets_with_key)
583
- return reduced_fdsets
584
-
585
- def create_optional_key_scheme(self, keys, fdsets):
586
- '''
587
- creates a new fdset if key is not subset of any of the existing sets attributes
588
-
589
- Return:
590
- list[FunctionalDependencySet]: The list of fdsets with relation that has key candidate of original scheme
591
- '''
592
- for key in keys:
593
- for fds in fdsets:
594
- if set(key) <= fds.attributes:
595
- return fdsets
596
- key = set(keys[0])
597
- fds = FunctionalDependencySet(key, 'R' + str(len(fdsets) + 1))
598
- m = len(key)//2
599
- # fds.add_dependency(sorted(key)[:m], sorted(key)[m:])
600
- fds.add_dependency(key, key)
601
- fdsets.append(fds)
602
- return fdsets
603
-
604
- def remove_subset_relations(self, fdsets):
605
- '''
606
- removes fdsets with attributes that are a subset of another fdset
607
-
608
- Return:
609
- list[FunctionalDependencySet]: The reduced list of fdsets
610
- '''
611
- if self.debug:
612
- print(fdsets)
613
- for fds in fdsets.copy():
614
- attributes = fds.attributes
615
- conflict = next((fdset for fdset in fdsets if fds.title != fdset.title and attributes <= fdset.attributes), None)
616
- if conflict is not None:
617
- fdsets.remove(fds)
618
- return fdsets
619
-