dbis-functional-dependencies 0.0.8__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,96 +0,0 @@
1
- '''
2
- Created on 2022-06-11
3
- @author: wf
4
- '''
5
- import time
6
- from functional_dependencies.BCNF import FunctionalDependencySet
7
- from lodstorage.sql import SQLDB
8
-
9
- class FDCheck:
10
- '''
11
- check functional dependencies for a tabular dataset in list of dicts form
12
- '''
13
-
14
- def __init__(self,lod:list,debug:bool=False):
15
- '''
16
- construct me with the given list of dicts
17
-
18
- Args:
19
- lod(list): the list of dicts (table) to check
20
- debug(bool): if true switch on debugging
21
- '''
22
- self.lod=lod
23
- self.debug=debug
24
- self.entityInfo=None
25
-
26
- def createDatabase(self,entityName,primaryKey=None,executeMany=True,fixNone=False,fixDates=False,debug=False,doClose=True):
27
- '''
28
- create a database for my list of Records
29
-
30
- Args:
31
- entityName(string): the name of the entity type to be used as a table name
32
- primaryKey(string): the name of the key / column to be used as a primary key
33
- executeMany(boolean): True if executeMany mode of sqlite3 should be used
34
- fixNone(boolean): fix dict entries that are undefined to have a "None" entry
35
- debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
36
- doClose(boolean): True if the connection should be closed
37
-
38
- '''
39
- size=len(self.lod)
40
- if self.debug:
41
- print("%s size is %d fixNone is %r fixDates is: %r" % (entityName,size,fixNone,fixDates))
42
- self.sqlDB=SQLDB(debug=debug,errorDebug=True)
43
- entityInfo=self.sqlDB.createTable(self.lod,entityName,primaryKey)
44
- startTime=time.time()
45
- self.sqlDB.store(self.lod,entityInfo,executeMany=executeMany,fixNone=fixNone)
46
- elapsed=0.000000000001 if time.time()-startTime == 0 else time.time()-startTime
47
- if self.debug:
48
- print ("adding %d %s records took %5.3f s => %5.f records/s" % (size,entityName,elapsed,size/elapsed))
49
- if self.debug:
50
- resultList=self.sqlDB.queryAll(entityInfo,fixDates=fixDates)
51
- print ("selecting %d %s records took %5.3f s => %5.f records/s" % (len(resultList),entityName,elapsed,len(resultList)/elapsed))
52
- if doClose:
53
- self.sqlDB.close()
54
- self.entityInfo=entityInfo
55
- return entityInfo
56
-
57
- def findFDs(self):
58
- '''
59
- find functional dependencies
60
-
61
- https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
62
- Return:
63
- FunctionalDependencySet: the set of functional dependencies
64
- '''
65
- if self.entityInfo is None:
66
- raise Exception("createDataBase needed to supply entityInfo")
67
- fields = list(self.entityInfo.typeMap.keys())
68
- table_name=self.entityInfo.name
69
- fds=FunctionalDependencySet()
70
- for i,field in enumerate(fields) :
71
- attr1_var=chr(ord('A')+i)
72
- fds.add_attribute(attr1_var, field)
73
- for i,field in enumerate(fields) :
74
- attr1_var=chr(ord('A')+i)
75
- for j in range(0, len(fields)):
76
- if (i == j):
77
- continue
78
-
79
- field_1 = fields[i]
80
- field_2 = fields[j]
81
- attr2_var=chr(ord('A')+j)
82
- sql=f'SELECT {field_1}, COUNT(DISTINCT {field_2}) c FROM {table_name} GROUP BY {field_1} HAVING c > 1'
83
- hits=self.sqlDB.query(sql)
84
- if self.debug:
85
- print (f"{sql}\n{hits}")
86
-
87
- if len(hits)==0:
88
- # Functional dependency found: it's not the case that there's more than one value (field_2)
89
- # associated with field_1
90
- fds.add_dependency(attr1_var,attr2_var)
91
- self.fds=fds
92
- return fds
93
-
94
-
95
-
96
-
@@ -1,197 +0,0 @@
1
- '''
2
- Created on 2022-06-08
3
-
4
- @author: wf
5
- '''
6
- from enum import Enum
7
- import copy
8
-
9
- class Notation(str,Enum):
10
- '''
11
- a notation to be used for stringifcation
12
- '''
13
- math="LaTex math symbols"
14
- utf8="UTF-8 Symbols"
15
- plain="plaintext ASCII"
16
- short="short notation without delimiters using juxtapositions"
17
-
18
- class Set(set):
19
- '''
20
- https://docs.python.org/3/tutorial/datastructures.html#sets
21
- '''
22
- notation=Notation.plain
23
-
24
- def __init__(self,*args, **kwargs):
25
- '''
26
- constructor
27
- '''
28
- super().__init__(*args,**kwargs)
29
-
30
- def __str__(self):
31
- text=Set.stringify_set(self,notation=Set.notation)
32
- return text
33
-
34
- @classmethod
35
- def stringify_set(cls,pSet,notation:Notation):
36
- '''
37
- return a string representation of the given set using the given delimiter
38
-
39
- Args:
40
- pSet(set): the set to stringify
41
- delim(str): the delimiter to use
42
- '''
43
- elementTexts=[]
44
- for elem in pSet:
45
- elementTexts.append(str(elem))
46
- sortedElements=sorted(elementTexts)
47
- if len(sortedElements)==0:
48
- # empty set
49
- if notation==Notation.math:
50
- text="\emptyset"
51
- else:
52
- text="∅"
53
- else:
54
- elementDelim=","
55
- if notation==Notation.short:
56
- elementDelim='' # Thin space would be better
57
- if notation==Notation.short:
58
- text=''
59
- elif notation==Notation.math:
60
- text="\{"
61
- else:
62
- text="{"
63
-
64
- delim=""
65
- for element in sortedElements:
66
- text+=f"{delim}{element}"
67
- delim=elementDelim
68
- if notation==Notation.short:
69
- pass
70
- elif notation==Notation.math:
71
- text+="\}"
72
- else:
73
- text+="}"
74
- return text
75
-
76
- class FD(object):
77
- """A functional dependency with left- and right-hand side."""
78
- notation=Notation.plain
79
-
80
- def __init__(self, left, right):
81
- """Create FD with left hand side and right hand side
82
-
83
- Args:
84
- left(object): set of attributes for the left hand side
85
- right(object): set of attributes for the right hand side
86
-
87
- """
88
- self.left=Set(left)
89
- self.right=Set(right)
90
-
91
- def __str__(self):
92
- '''
93
- convert me to a string
94
-
95
- Return:
96
- str: a string representation of myself
97
- '''
98
- text=FD.stringify_FD(self, FD.notation)
99
- return text
100
-
101
- @classmethod
102
- def stringify_FD(cls,fd,notation:Notation):
103
- '''
104
- Return:
105
- a string representation of the given Functional Dependency
106
- '''
107
- setNotation=Notation.short
108
- leftText=Set.stringify_set(fd.left, notation=setNotation)
109
- rightText=Set.stringify_set(fd.right,notation=setNotation)
110
- if notation==Notation.utf8:
111
- to="→"
112
- elif notation==Notation.math:
113
- to=" \to "
114
- else:
115
- to="->"
116
- text=f"{leftText}{to}{rightText}"
117
- return text
118
-
119
- class Attribute:
120
- '''
121
- an Attribute e.g.
122
- Example: Attribute('A', 'Wikidata identifier', 'Wikidata-Schlüssel')
123
- '''
124
- def __init__(self, var_name:str, english_name:str, german_name:str):
125
- '''
126
- constructor
127
-
128
- Args:
129
- var_name(str): the Variable name
130
- english_name(str): the english name
131
- german_name(str): the german name
132
-
133
- '''
134
- self.var_name=var_name
135
- self.german_name=german_name
136
- self.english_name=english_name
137
-
138
- def __str__(self):
139
- text=f"{self.var_name}≡{self.english_name}≡{self.german_name}"
140
- return text
141
-
142
- class RelSchema(object):
143
- """A relation schema consists of a set of attributes and a set of FDs.
144
-
145
- Various normal forms exist to describe "good" schemata. Normalization
146
- is the process of creating schemata that satisfy certain normal forms.
147
- The class of synthesis algorithms targets 3NF.
148
- """
149
-
150
- def __init__(self, attributes:Set, fds,name:str="R",notation:Notation=None):
151
- """
152
- Construct relation schema with attributes and FDs.
153
-
154
- Args:
155
- attributes(list): a list of Attributes
156
- fds(FunctionalDependencySet): a set of FDs
157
- name(string): the name of the RelatonSchema
158
- notation(Notation): the notation to use
159
- """
160
- if notation is None:
161
- notation=Notation.utf8
162
- self.notation=notation
163
- self.attributes = attributes
164
- self.fds = fds
165
- self.name=name
166
-
167
- def __str__(self):
168
- text=RelSchema.stringify(self, self.notation)
169
- return text
170
-
171
- @classmethod
172
- def stringify(cls,rs,notation):
173
- '''
174
- return a textual representation of the given relational schema in the given notation
175
- '''
176
- attrText=Set.stringify_set(rs.attributes, notation)
177
- fds=copy.deepcopy(rs.fds)
178
- fds.notation=notation
179
- fdsText=str(fds)
180
- rsSet=Set([attrText,fdsText])
181
- rsSetText=Set.stringify_set(rsSet,notation=notation)
182
- rsText=f"{rs.name}={rsSetText}"
183
- return rsText
184
-
185
- def findCandidateKeys(self):
186
- '''
187
- find candidate keys
188
- '''
189
- cklist=[]
190
- for foundKeySet in self.fds.find_candidate_keys():
191
- keySet=Set()
192
- for foundKey in foundKeySet:
193
- attr=self.fds.attribute_map[foundKey]
194
- keySet.update([attr])
195
- cklist.append(keySet)
196
- return cklist
197
-