dbis-functional-dependencies 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/PKG-INFO +5 -4
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/pyproject.toml +4 -4
- dbis_functional_dependencies-1.0.2/src/dbis_functional_dependencies/fdcheck.py +166 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/PKG-INFO +5 -4
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/requires.txt +1 -2
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdfromdata.py +3 -4
- dbis_functional_dependencies-1.0.0/src/dbis_functional_dependencies/fdcheck.py +0 -113
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/LICENSE +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/README.md +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/setup.cfg +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/BCNF.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/__init__.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/fds.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/fdsbase.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/SOURCES.txt +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/dependency_links.txt +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/top_level.txt +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bcnf.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bitplan.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_cheung.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_dbis.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fds.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdsbase.py +0 -0
- {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_tum.py +0 -0
@@ -1,11 +1,12 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: dbis-functional-dependencies
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.2
|
4
4
|
Summary: RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme
|
5
5
|
Author-email: DBIS i5 RWTH Aachen <dbis-vl@dbis.rwth-aachen.de>
|
6
6
|
Project-URL: Homepage, https://git.rwth-aachen.de/i5/teaching/dbis/dbis-functional-dependencies
|
7
7
|
Classifier: Programming Language :: Python :: 3.10
|
8
8
|
Classifier: Programming Language :: Python :: 3.11
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
9
10
|
Requires-Python: >=3.10
|
10
11
|
Description-Content-Type: text/markdown
|
11
12
|
License-File: LICENSE
|
@@ -16,12 +17,12 @@ Requires-Dist: sphinx~=7.2
|
|
16
17
|
Requires-Dist: sphinxcontrib-apidoc~=0.4
|
17
18
|
Requires-Dist: functional-dependencies~=1.3
|
18
19
|
Requires-Dist: build~=1.0
|
19
|
-
Requires-Dist: pylodstorage~=0.4.11
|
20
20
|
Provides-Extra: test
|
21
21
|
Requires-Dist: black==23.12.1; extra == "test"
|
22
22
|
Provides-Extra: build
|
23
|
-
Requires-Dist: twine==
|
23
|
+
Requires-Dist: twine==6.*; extra == "build"
|
24
24
|
Requires-Dist: build==1.*; extra == "build"
|
25
|
+
Dynamic: license-file
|
25
26
|
|
26
27
|
# DBIS Functional Dependencies
|
27
28
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name="dbis-functional-dependencies"
|
3
|
-
version='1.0.
|
3
|
+
version='1.0.2'
|
4
4
|
description="RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme"
|
5
5
|
|
6
6
|
authors = [
|
@@ -10,7 +10,8 @@ readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
11
11
|
classifiers = [
|
12
12
|
"Programming Language :: Python :: 3.10",
|
13
|
-
"Programming Language :: Python :: 3.11"
|
13
|
+
"Programming Language :: Python :: 3.11",
|
14
|
+
"Programming Language :: Python :: 3.13"
|
14
15
|
]
|
15
16
|
|
16
17
|
dependencies = [
|
@@ -21,7 +22,6 @@ dependencies = [
|
|
21
22
|
"sphinxcontrib-apidoc~=0.4",
|
22
23
|
"functional-dependencies~=1.3",
|
23
24
|
"build~=1.0",
|
24
|
-
"pylodstorage~=0.4.11"
|
25
25
|
]
|
26
26
|
|
27
27
|
[project.optional-dependencies]
|
@@ -29,7 +29,7 @@ test = [
|
|
29
29
|
"black==23.12.1"
|
30
30
|
]
|
31
31
|
build = [
|
32
|
-
"twine==
|
32
|
+
"twine==6.*",
|
33
33
|
"build==1.*"
|
34
34
|
]
|
35
35
|
|
@@ -0,0 +1,166 @@
|
|
1
|
+
"""
|
2
|
+
Created on 2022-06-11
|
3
|
+
@author: wf
|
4
|
+
"""
|
5
|
+
import time
|
6
|
+
from dbis_functional_dependencies.BCNF import FunctionalDependencySet
|
7
|
+
import sqlite3
|
8
|
+
|
9
|
+
|
10
|
+
class FDCheck:
|
11
|
+
"""
|
12
|
+
check functional dependencies for a tabular dataset in list of dicts form
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, lod: list, debug: bool = False):
|
16
|
+
"""
|
17
|
+
construct me with the given list of dicts
|
18
|
+
|
19
|
+
Args:
|
20
|
+
lod(list): the list of dicts (table) to check
|
21
|
+
debug(bool): if true switch on debugging
|
22
|
+
"""
|
23
|
+
self.lod = lod
|
24
|
+
self.debug = debug
|
25
|
+
self.entityInfo = None
|
26
|
+
self.conn = None
|
27
|
+
|
28
|
+
def createDatabase(
|
29
|
+
self,
|
30
|
+
entityName,
|
31
|
+
primaryKey=None,
|
32
|
+
executeMany=True,
|
33
|
+
fixNone=False,
|
34
|
+
fixDates=False,
|
35
|
+
debug=False,
|
36
|
+
doClose=True,
|
37
|
+
):
|
38
|
+
"""
|
39
|
+
create a database for my list of Records
|
40
|
+
|
41
|
+
Args:
|
42
|
+
entityName(string): the name of the entity type to be used as a table name
|
43
|
+
primaryKey(string): the name of the key / column to be used as a primary key
|
44
|
+
executeMany(boolean): True if executeMany mode of sqlite3 should be used
|
45
|
+
fixNone(boolean): fix dict entries that are undefined to have a "None" entry
|
46
|
+
debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
|
47
|
+
doClose(boolean): True if the connection should be closed
|
48
|
+
|
49
|
+
"""
|
50
|
+
size = len(self.lod)
|
51
|
+
if self.debug:
|
52
|
+
print(
|
53
|
+
"%s size is %d fixNone is %r fixDates is: %r"
|
54
|
+
% (entityName, size, fixNone, fixDates)
|
55
|
+
)
|
56
|
+
|
57
|
+
self.conn = sqlite3.connect(":memory:")
|
58
|
+
cursor = self.conn.cursor()
|
59
|
+
|
60
|
+
# Infer schema
|
61
|
+
keys = self.lod[0].keys()
|
62
|
+
columns = []
|
63
|
+
for k in keys:
|
64
|
+
value = self.lod[0][k]
|
65
|
+
sql_type = (
|
66
|
+
"INTEGER"
|
67
|
+
if isinstance(value, int)
|
68
|
+
else "REAL"
|
69
|
+
if isinstance(value, float)
|
70
|
+
else "TEXT"
|
71
|
+
)
|
72
|
+
col_def = f"{k} {sql_type}"
|
73
|
+
if k == primaryKey:
|
74
|
+
col_def += " PRIMARY KEY"
|
75
|
+
columns.append(col_def)
|
76
|
+
create_stmt = f"CREATE TABLE {entityName} ({', '.join(columns)});"
|
77
|
+
if debug:
|
78
|
+
print(create_stmt)
|
79
|
+
cursor.execute(create_stmt)
|
80
|
+
|
81
|
+
# Prepare data
|
82
|
+
col_names = list(keys)
|
83
|
+
placeholders = ", ".join(["?"] * len(col_names))
|
84
|
+
insert_stmt = (
|
85
|
+
f"INSERT INTO {entityName} ({', '.join(col_names)}) VALUES ({placeholders})"
|
86
|
+
)
|
87
|
+
|
88
|
+
values = []
|
89
|
+
for row in self.lod:
|
90
|
+
values.append(tuple(row.get(k, None) for k in col_names))
|
91
|
+
|
92
|
+
startTime = time.time()
|
93
|
+
if executeMany:
|
94
|
+
cursor.executemany(insert_stmt, values)
|
95
|
+
else:
|
96
|
+
for v in values:
|
97
|
+
cursor.execute(insert_stmt, v)
|
98
|
+
self.conn.commit()
|
99
|
+
|
100
|
+
elapsed = max(1e-12, time.time() - startTime)
|
101
|
+
if self.debug:
|
102
|
+
print(
|
103
|
+
f"adding {size} {entityName} records took {elapsed:.3f} s => {size/elapsed:.0f} records/s"
|
104
|
+
)
|
105
|
+
|
106
|
+
cursor.execute(f"SELECT * FROM {entityName}")
|
107
|
+
resultList = cursor.fetchall()
|
108
|
+
print(
|
109
|
+
f"selecting {len(resultList)} {entityName} records took {elapsed:.3f} s => {len(resultList)/elapsed:.0f} records/s"
|
110
|
+
)
|
111
|
+
|
112
|
+
self.entityInfo = {
|
113
|
+
"name": entityName,
|
114
|
+
"typeMap": {k: type(self.lod[0][k]) for k in keys},
|
115
|
+
"fields": list(keys),
|
116
|
+
}
|
117
|
+
|
118
|
+
if doClose:
|
119
|
+
cursor.close()
|
120
|
+
self.conn.close()
|
121
|
+
self.conn = None
|
122
|
+
|
123
|
+
return self.entityInfo
|
124
|
+
|
125
|
+
def findFDs(self):
|
126
|
+
"""
|
127
|
+
find functional dependencies
|
128
|
+
|
129
|
+
https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
|
130
|
+
Return:
|
131
|
+
FunctionalDependencySet: the set of functional dependencies
|
132
|
+
"""
|
133
|
+
if self.entityInfo is None or not self.conn:
|
134
|
+
raise Exception("createDatabase must be called before findFDs")
|
135
|
+
|
136
|
+
table_name = self.entityInfo["name"]
|
137
|
+
fields = self.entityInfo["fields"]
|
138
|
+
|
139
|
+
fds = FunctionalDependencySet()
|
140
|
+
for i, field in enumerate(fields):
|
141
|
+
attr1_var = chr(ord("A") + i)
|
142
|
+
fds.add_attribute(attr1_var, field)
|
143
|
+
|
144
|
+
cursor = self.conn.cursor()
|
145
|
+
for i, field_1 in enumerate(fields):
|
146
|
+
attr1_var = chr(ord("A") + i)
|
147
|
+
for j, field_2 in enumerate(fields):
|
148
|
+
if i == j:
|
149
|
+
continue
|
150
|
+
attr2_var = chr(ord("A") + j)
|
151
|
+
sql = (
|
152
|
+
f"SELECT {field_1}, COUNT(DISTINCT {field_2}) as c\n"
|
153
|
+
f"FROM {table_name}\n"
|
154
|
+
f"GROUP BY {field_1}\n"
|
155
|
+
f"HAVING c > 1"
|
156
|
+
)
|
157
|
+
cursor.execute(sql)
|
158
|
+
hits = cursor.fetchall()
|
159
|
+
if self.debug:
|
160
|
+
print(f"{sql.strip()}\n{hits}")
|
161
|
+
if not hits:
|
162
|
+
fds.add_dependency(attr1_var, attr2_var)
|
163
|
+
|
164
|
+
cursor.close()
|
165
|
+
self.fds = fds
|
166
|
+
return fds
|
@@ -1,11 +1,12 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: dbis-functional-dependencies
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.2
|
4
4
|
Summary: RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme
|
5
5
|
Author-email: DBIS i5 RWTH Aachen <dbis-vl@dbis.rwth-aachen.de>
|
6
6
|
Project-URL: Homepage, https://git.rwth-aachen.de/i5/teaching/dbis/dbis-functional-dependencies
|
7
7
|
Classifier: Programming Language :: Python :: 3.10
|
8
8
|
Classifier: Programming Language :: Python :: 3.11
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
9
10
|
Requires-Python: >=3.10
|
10
11
|
Description-Content-Type: text/markdown
|
11
12
|
License-File: LICENSE
|
@@ -16,12 +17,12 @@ Requires-Dist: sphinx~=7.2
|
|
16
17
|
Requires-Dist: sphinxcontrib-apidoc~=0.4
|
17
18
|
Requires-Dist: functional-dependencies~=1.3
|
18
19
|
Requires-Dist: build~=1.0
|
19
|
-
Requires-Dist: pylodstorage~=0.4.11
|
20
20
|
Provides-Extra: test
|
21
21
|
Requires-Dist: black==23.12.1; extra == "test"
|
22
22
|
Provides-Extra: build
|
23
|
-
Requires-Dist: twine==
|
23
|
+
Requires-Dist: twine==6.*; extra == "build"
|
24
24
|
Requires-Dist: build==1.*; extra == "build"
|
25
|
+
Dynamic: license-file
|
25
26
|
|
26
27
|
# DBIS Functional Dependencies
|
27
28
|
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdfromdata.py
RENAMED
@@ -7,7 +7,6 @@ from pathlib import Path
|
|
7
7
|
import json
|
8
8
|
|
9
9
|
from tests.fdstest import FunctionalDependencySetTest
|
10
|
-
from lodstorage.jsonable import JSONAble
|
11
10
|
|
12
11
|
from dbis_functional_dependencies.fdcheck import FDCheck
|
13
12
|
|
@@ -33,8 +32,8 @@ class Test_FD_FromData(FunctionalDependencySetTest):
|
|
33
32
|
the jsonFileNme to read from
|
34
33
|
"""
|
35
34
|
jsonFilePath = f"{self.sampleDataPath}/{jsonFileName}"
|
36
|
-
|
37
|
-
|
35
|
+
with open(jsonFilePath, "r", encoding="utf-8") as f:
|
36
|
+
lod = json.load(f)
|
38
37
|
return lod
|
39
38
|
|
40
39
|
def testExamples(self):
|
@@ -56,7 +55,7 @@ class Test_FD_FromData(FunctionalDependencySetTest):
|
|
56
55
|
print(lod)
|
57
56
|
self.assertTrue(type(lod) is list)
|
58
57
|
expectedLen = expected[i]["len"]
|
59
|
-
self.
|
58
|
+
self.assertEqual(expectedLen, len(lod))
|
60
59
|
fdCheck = FDCheck(lod, debug=True)
|
61
60
|
entityName = example["entityName"]
|
62
61
|
fdCheck.createDatabase(entityName, doClose=False)
|
@@ -1,113 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Created on 2022-06-11
|
3
|
-
@author: wf
|
4
|
-
"""
|
5
|
-
import time
|
6
|
-
from dbis_functional_dependencies.BCNF import FunctionalDependencySet
|
7
|
-
from lodstorage.sql import SQLDB
|
8
|
-
|
9
|
-
|
10
|
-
class FDCheck:
|
11
|
-
"""
|
12
|
-
check functional dependencies for a tabular dataset in list of dicts form
|
13
|
-
"""
|
14
|
-
|
15
|
-
def __init__(self, lod: list, debug: bool = False):
|
16
|
-
"""
|
17
|
-
construct me with the given list of dicts
|
18
|
-
|
19
|
-
Args:
|
20
|
-
lod(list): the list of dicts (table) to check
|
21
|
-
debug(bool): if true switch on debugging
|
22
|
-
"""
|
23
|
-
self.lod = lod
|
24
|
-
self.debug = debug
|
25
|
-
self.entityInfo = None
|
26
|
-
|
27
|
-
def createDatabase(
|
28
|
-
self,
|
29
|
-
entityName,
|
30
|
-
primaryKey=None,
|
31
|
-
executeMany=True,
|
32
|
-
fixNone=False,
|
33
|
-
fixDates=False,
|
34
|
-
debug=False,
|
35
|
-
doClose=True,
|
36
|
-
):
|
37
|
-
"""
|
38
|
-
create a database for my list of Records
|
39
|
-
|
40
|
-
Args:
|
41
|
-
entityName(string): the name of the entity type to be used as a table name
|
42
|
-
primaryKey(string): the name of the key / column to be used as a primary key
|
43
|
-
executeMany(boolean): True if executeMany mode of sqlite3 should be used
|
44
|
-
fixNone(boolean): fix dict entries that are undefined to have a "None" entry
|
45
|
-
debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
|
46
|
-
doClose(boolean): True if the connection should be closed
|
47
|
-
|
48
|
-
"""
|
49
|
-
size = len(self.lod)
|
50
|
-
if self.debug:
|
51
|
-
print(
|
52
|
-
"%s size is %d fixNone is %r fixDates is: %r"
|
53
|
-
% (entityName, size, fixNone, fixDates)
|
54
|
-
)
|
55
|
-
self.sqlDB = SQLDB(debug=debug, errorDebug=True)
|
56
|
-
entityInfo = self.sqlDB.createTable(self.lod, entityName, primaryKey)
|
57
|
-
startTime = time.time()
|
58
|
-
self.sqlDB.store(self.lod, entityInfo, executeMany=executeMany, fixNone=fixNone)
|
59
|
-
elapsed = (
|
60
|
-
0.000000000001 if time.time() - startTime == 0 else time.time() - startTime
|
61
|
-
)
|
62
|
-
if self.debug:
|
63
|
-
print(
|
64
|
-
"adding %d %s records took %5.3f s => %5.f records/s"
|
65
|
-
% (size, entityName, elapsed, size / elapsed)
|
66
|
-
)
|
67
|
-
if self.debug:
|
68
|
-
resultList = self.sqlDB.queryAll(entityInfo, fixDates=fixDates)
|
69
|
-
print(
|
70
|
-
"selecting %d %s records took %5.3f s => %5.f records/s"
|
71
|
-
% (len(resultList), entityName, elapsed, len(resultList) / elapsed)
|
72
|
-
)
|
73
|
-
if doClose:
|
74
|
-
self.sqlDB.close()
|
75
|
-
self.entityInfo = entityInfo
|
76
|
-
return entityInfo
|
77
|
-
|
78
|
-
def findFDs(self):
|
79
|
-
"""
|
80
|
-
find functional dependencies
|
81
|
-
|
82
|
-
https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
|
83
|
-
Return:
|
84
|
-
FunctionalDependencySet: the set of functional dependencies
|
85
|
-
"""
|
86
|
-
if self.entityInfo is None:
|
87
|
-
raise Exception("createDataBase needed to supply entityInfo")
|
88
|
-
fields = list(self.entityInfo.typeMap.keys())
|
89
|
-
table_name = self.entityInfo.name
|
90
|
-
fds = FunctionalDependencySet()
|
91
|
-
for i, field in enumerate(fields):
|
92
|
-
attr1_var = chr(ord("A") + i)
|
93
|
-
fds.add_attribute(attr1_var, field)
|
94
|
-
for i, field in enumerate(fields):
|
95
|
-
attr1_var = chr(ord("A") + i)
|
96
|
-
for j in range(0, len(fields)):
|
97
|
-
if i == j:
|
98
|
-
continue
|
99
|
-
|
100
|
-
field_1 = fields[i]
|
101
|
-
field_2 = fields[j]
|
102
|
-
attr2_var = chr(ord("A") + j)
|
103
|
-
sql = f"SELECT {field_1}, COUNT(DISTINCT {field_2}) c FROM {table_name} GROUP BY {field_1} HAVING c > 1"
|
104
|
-
hits = self.sqlDB.query(sql)
|
105
|
-
if self.debug:
|
106
|
-
print(f"{sql}\n{hits}")
|
107
|
-
|
108
|
-
if len(hits) == 0:
|
109
|
-
# Functional dependency found: it's not the case that there's more than one value (field_2)
|
110
|
-
# associated with field_1
|
111
|
-
fds.add_dependency(attr1_var, attr2_var)
|
112
|
-
self.fds = fds
|
113
|
-
return fds
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bcnf.py
RENAMED
File without changes
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bitplan.py
RENAMED
File without changes
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_cheung.py
RENAMED
File without changes
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_dbis.py
RENAMED
File without changes
|
File without changes
|
{dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdsbase.py
RENAMED
File without changes
|
File without changes
|