dbis-functional-dependencies 0.0.8__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {functional_dependencies → dbis_functional_dependencies}/BCNF.py +1761 -1380
- dbis_functional_dependencies/fdcheck.py +113 -0
- {functional_dependencies → dbis_functional_dependencies}/fds.py +14 -10
- dbis_functional_dependencies/fdsbase.py +207 -0
- dbis_functional_dependencies-1.0.0.dist-info/METADATA +178 -0
- dbis_functional_dependencies-1.0.0.dist-info/RECORD +10 -0
- {dbis_functional_dependencies-0.0.8.dist-info → dbis_functional_dependencies-1.0.0.dist-info}/WHEEL +1 -1
- dbis_functional_dependencies-1.0.0.dist-info/top_level.txt +1 -0
- dbis_functional_dependencies-0.0.8.dist-info/METADATA +0 -24
- dbis_functional_dependencies-0.0.8.dist-info/RECORD +0 -10
- dbis_functional_dependencies-0.0.8.dist-info/top_level.txt +0 -1
- functional_dependencies/fdcheck.py +0 -96
- functional_dependencies/fdsbase.py +0 -197
- {functional_dependencies → dbis_functional_dependencies}/__init__.py +0 -0
- {dbis_functional_dependencies-0.0.8.dist-info → dbis_functional_dependencies-1.0.0.dist-info}/LICENSE +0 -0
@@ -1,96 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Created on 2022-06-11
|
3
|
-
@author: wf
|
4
|
-
'''
|
5
|
-
import time
|
6
|
-
from functional_dependencies.BCNF import FunctionalDependencySet
|
7
|
-
from lodstorage.sql import SQLDB
|
8
|
-
|
9
|
-
class FDCheck:
|
10
|
-
'''
|
11
|
-
check functional dependencies for a tabular dataset in list of dicts form
|
12
|
-
'''
|
13
|
-
|
14
|
-
def __init__(self,lod:list,debug:bool=False):
|
15
|
-
'''
|
16
|
-
construct me with the given list of dicts
|
17
|
-
|
18
|
-
Args:
|
19
|
-
lod(list): the list of dicts (table) to check
|
20
|
-
debug(bool): if true switch on debugging
|
21
|
-
'''
|
22
|
-
self.lod=lod
|
23
|
-
self.debug=debug
|
24
|
-
self.entityInfo=None
|
25
|
-
|
26
|
-
def createDatabase(self,entityName,primaryKey=None,executeMany=True,fixNone=False,fixDates=False,debug=False,doClose=True):
|
27
|
-
'''
|
28
|
-
create a database for my list of Records
|
29
|
-
|
30
|
-
Args:
|
31
|
-
entityName(string): the name of the entity type to be used as a table name
|
32
|
-
primaryKey(string): the name of the key / column to be used as a primary key
|
33
|
-
executeMany(boolean): True if executeMany mode of sqlite3 should be used
|
34
|
-
fixNone(boolean): fix dict entries that are undefined to have a "None" entry
|
35
|
-
debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
|
36
|
-
doClose(boolean): True if the connection should be closed
|
37
|
-
|
38
|
-
'''
|
39
|
-
size=len(self.lod)
|
40
|
-
if self.debug:
|
41
|
-
print("%s size is %d fixNone is %r fixDates is: %r" % (entityName,size,fixNone,fixDates))
|
42
|
-
self.sqlDB=SQLDB(debug=debug,errorDebug=True)
|
43
|
-
entityInfo=self.sqlDB.createTable(self.lod,entityName,primaryKey)
|
44
|
-
startTime=time.time()
|
45
|
-
self.sqlDB.store(self.lod,entityInfo,executeMany=executeMany,fixNone=fixNone)
|
46
|
-
elapsed=0.000000000001 if time.time()-startTime == 0 else time.time()-startTime
|
47
|
-
if self.debug:
|
48
|
-
print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed))
|
49
|
-
if self.debug:
|
50
|
-
resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)
|
51
|
-
print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed))
|
52
|
-
if doClose:
|
53
|
-
self.sqlDB.close()
|
54
|
-
self.entityInfo=entityInfo
|
55
|
-
return entityInfo
|
56
|
-
|
57
|
-
def findFDs(self):
|
58
|
-
'''
|
59
|
-
find functional dependencies
|
60
|
-
|
61
|
-
https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
|
62
|
-
Return:
|
63
|
-
FunctionalDependencySet: the set of functional dependencies
|
64
|
-
'''
|
65
|
-
if self.entityInfo is None:
|
66
|
-
raise Exception("createDataBase needed to supply entityInfo")
|
67
|
-
fields = list(self.entityInfo.typeMap.keys())
|
68
|
-
table_name=self.entityInfo.name
|
69
|
-
fds=FunctionalDependencySet()
|
70
|
-
for i,field in enumerate(fields) :
|
71
|
-
attr1_var=chr(ord('A')+i)
|
72
|
-
fds.add_attribute(attr1_var, field)
|
73
|
-
for i,field in enumerate(fields) :
|
74
|
-
attr1_var=chr(ord('A')+i)
|
75
|
-
for j in range(0, len(fields)):
|
76
|
-
if (i == j):
|
77
|
-
continue
|
78
|
-
|
79
|
-
field_1 = fields[i]
|
80
|
-
field_2 = fields[j]
|
81
|
-
attr2_var=chr(ord('A')+j)
|
82
|
-
sql=f'SELECT {field_1}, COUNT(DISTINCT {field_2}) c FROM {table_name} GROUP BY {field_1} HAVING c > 1'
|
83
|
-
hits=self.sqlDB.query(sql)
|
84
|
-
if self.debug:
|
85
|
-
print (f"{sql}\n{hits}")
|
86
|
-
|
87
|
-
if len(hits)==0:
|
88
|
-
# Functional dependency found: it's not the case that there's more than one value (field_2)
|
89
|
-
# associated with field_1
|
90
|
-
fds.add_dependency(attr1_var,attr2_var)
|
91
|
-
self.fds=fds
|
92
|
-
return fds
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
@@ -1,197 +0,0 @@
|
|
1
|
-
'''
|
2
|
-
Created on 2022-06-08
|
3
|
-
|
4
|
-
@author: wf
|
5
|
-
'''
|
6
|
-
from enum import Enum
|
7
|
-
import copy
|
8
|
-
|
9
|
-
class Notation(str,Enum):
|
10
|
-
'''
|
11
|
-
a notation to be used for stringifcation
|
12
|
-
'''
|
13
|
-
math="LaTex math symbols"
|
14
|
-
utf8="UTF-8 Symbols"
|
15
|
-
plain="plaintext ASCII"
|
16
|
-
short="short notation without delimiters using juxtapositions"
|
17
|
-
|
18
|
-
class Set(set):
|
19
|
-
'''
|
20
|
-
https://docs.python.org/3/tutorial/datastructures.html#sets
|
21
|
-
'''
|
22
|
-
notation=Notation.plain
|
23
|
-
|
24
|
-
def __init__(self,*args, **kwargs):
|
25
|
-
'''
|
26
|
-
constructor
|
27
|
-
'''
|
28
|
-
super().__init__(*args,**kwargs)
|
29
|
-
|
30
|
-
def __str__(self):
|
31
|
-
text=Set.stringify_set(self,notation=Set.notation)
|
32
|
-
return text
|
33
|
-
|
34
|
-
@classmethod
|
35
|
-
def stringify_set(cls,pSet,notation:Notation):
|
36
|
-
'''
|
37
|
-
return a string representation of the given set using the given delimiter
|
38
|
-
|
39
|
-
Args:
|
40
|
-
pSet(set): the set to stringify
|
41
|
-
delim(str): the delimiter to use
|
42
|
-
'''
|
43
|
-
elementTexts=[]
|
44
|
-
for elem in pSet:
|
45
|
-
elementTexts.append(str(elem))
|
46
|
-
sortedElements=sorted(elementTexts)
|
47
|
-
if len(sortedElements)==0:
|
48
|
-
# empty set
|
49
|
-
if notation==Notation.math:
|
50
|
-
text="\emptyset"
|
51
|
-
else:
|
52
|
-
text="∅"
|
53
|
-
else:
|
54
|
-
elementDelim=","
|
55
|
-
if notation==Notation.short:
|
56
|
-
elementDelim='' # Thin space would be better
|
57
|
-
if notation==Notation.short:
|
58
|
-
text=''
|
59
|
-
elif notation==Notation.math:
|
60
|
-
text="\{"
|
61
|
-
else:
|
62
|
-
text="{"
|
63
|
-
|
64
|
-
delim=""
|
65
|
-
for element in sortedElements:
|
66
|
-
text+=f"{delim}{element}"
|
67
|
-
delim=elementDelim
|
68
|
-
if notation==Notation.short:
|
69
|
-
pass
|
70
|
-
elif notation==Notation.math:
|
71
|
-
text+="\}"
|
72
|
-
else:
|
73
|
-
text+="}"
|
74
|
-
return text
|
75
|
-
|
76
|
-
class FD(object):
|
77
|
-
"""A functional dependency with left- and right-hand side."""
|
78
|
-
notation=Notation.plain
|
79
|
-
|
80
|
-
def __init__(self, left, right):
|
81
|
-
"""Create FD with left hand side and right hand side
|
82
|
-
|
83
|
-
Args:
|
84
|
-
left(object): set of attributes for the left hand side
|
85
|
-
right(object): set of attributes for the right hand side
|
86
|
-
|
87
|
-
"""
|
88
|
-
self.left=Set(left)
|
89
|
-
self.right=Set(right)
|
90
|
-
|
91
|
-
def __str__(self):
|
92
|
-
'''
|
93
|
-
convert me to a string
|
94
|
-
|
95
|
-
Return:
|
96
|
-
str: a string representation of myself
|
97
|
-
'''
|
98
|
-
text=FD.stringify_FD(self, FD.notation)
|
99
|
-
return text
|
100
|
-
|
101
|
-
@classmethod
|
102
|
-
def stringify_FD(cls,fd,notation:Notation):
|
103
|
-
'''
|
104
|
-
Return:
|
105
|
-
a string representation of the given Functional Dependency
|
106
|
-
'''
|
107
|
-
setNotation=Notation.short
|
108
|
-
leftText=Set.stringify_set(fd.left, notation=setNotation)
|
109
|
-
rightText=Set.stringify_set(fd.right,notation=setNotation)
|
110
|
-
if notation==Notation.utf8:
|
111
|
-
to="→"
|
112
|
-
elif notation==Notation.math:
|
113
|
-
to=" \to "
|
114
|
-
else:
|
115
|
-
to="->"
|
116
|
-
text=f"{leftText}{to}{rightText}"
|
117
|
-
return text
|
118
|
-
|
119
|
-
class Attribute:
|
120
|
-
'''
|
121
|
-
an Attribute e.g.
|
122
|
-
Example: Attribute('A', 'Wikidata identifier', 'Wikidata-Schlüssel')
|
123
|
-
'''
|
124
|
-
def __init__(self, var_name:str, english_name:str, german_name:str):
|
125
|
-
'''
|
126
|
-
constructor
|
127
|
-
|
128
|
-
Args:
|
129
|
-
var_name(str): the Variable name
|
130
|
-
english_name(str): the english name
|
131
|
-
german_name(str): the german name
|
132
|
-
|
133
|
-
'''
|
134
|
-
self.var_name=var_name
|
135
|
-
self.german_name=german_name
|
136
|
-
self.english_name=english_name
|
137
|
-
|
138
|
-
def __str__(self):
|
139
|
-
text=f"{self.var_name}≡{self.english_name}≡{self.german_name}"
|
140
|
-
return text
|
141
|
-
|
142
|
-
class RelSchema(object):
|
143
|
-
"""A relation schema consists of a set of attributes and a set of FDs.
|
144
|
-
|
145
|
-
Various normal forms exist to describe "good" schemata. Normalization
|
146
|
-
is the process of creating schemata that satisfy certain normal forms.
|
147
|
-
The class of synthesis algorithms targets 3NF.
|
148
|
-
"""
|
149
|
-
|
150
|
-
def __init__(self, attributes:Set, fds,name:str="R",notation:Notation=None):
|
151
|
-
"""
|
152
|
-
Construct relation schema with attributes and FDs.
|
153
|
-
|
154
|
-
Args:
|
155
|
-
attributes(list): a list of Attributes
|
156
|
-
fds(FunctionalDependencySet): a set of FDs
|
157
|
-
name(string): the name of the RelatonSchema
|
158
|
-
notation(Notation): the notation to use
|
159
|
-
"""
|
160
|
-
if notation is None:
|
161
|
-
notation=Notation.utf8
|
162
|
-
self.notation=notation
|
163
|
-
self.attributes = attributes
|
164
|
-
self.fds = fds
|
165
|
-
self.name=name
|
166
|
-
|
167
|
-
def __str__(self):
|
168
|
-
text=RelSchema.stringify(self, self.notation)
|
169
|
-
return text
|
170
|
-
|
171
|
-
@classmethod
|
172
|
-
def stringify(cls,rs,notation):
|
173
|
-
'''
|
174
|
-
return a textual representation of the given relational schema in the given notation
|
175
|
-
'''
|
176
|
-
attrText=Set.stringify_set(rs.attributes, notation)
|
177
|
-
fds=copy.deepcopy(rs.fds)
|
178
|
-
fds.notation=notation
|
179
|
-
fdsText=str(fds)
|
180
|
-
rsSet=Set([attrText,fdsText])
|
181
|
-
rsSetText=Set.stringify_set(rsSet,notation=notation)
|
182
|
-
rsText=f"{rs.name}={rsSetText}"
|
183
|
-
return rsText
|
184
|
-
|
185
|
-
def findCandidateKeys(self):
|
186
|
-
'''
|
187
|
-
find candidate keys
|
188
|
-
'''
|
189
|
-
cklist=[]
|
190
|
-
for foundKeySet in self.fds.find_candidate_keys():
|
191
|
-
keySet=Set()
|
192
|
-
for foundKey in foundKeySet:
|
193
|
-
attr=self.fds.attribute_map[foundKey]
|
194
|
-
keySet.update([attr])
|
195
|
-
cklist.append(keySet)
|
196
|
-
return cklist
|
197
|
-
|
File without changes
|
File without changes
|