dbis-functional-dependencies 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/PKG-INFO +5 -4
  2. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/pyproject.toml +4 -4
  3. dbis_functional_dependencies-1.0.2/src/dbis_functional_dependencies/fdcheck.py +166 -0
  4. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/PKG-INFO +5 -4
  5. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/requires.txt +1 -2
  6. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdfromdata.py +3 -4
  7. dbis_functional_dependencies-1.0.0/src/dbis_functional_dependencies/fdcheck.py +0 -113
  8. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/LICENSE +0 -0
  9. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/README.md +0 -0
  10. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/setup.cfg +0 -0
  11. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/BCNF.py +0 -0
  12. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/__init__.py +0 -0
  13. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/fds.py +0 -0
  14. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies/fdsbase.py +0 -0
  15. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/SOURCES.txt +0 -0
  16. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/dependency_links.txt +0 -0
  17. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/src/dbis_functional_dependencies.egg-info/top_level.txt +0 -0
  18. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bcnf.py +0 -0
  19. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_bitplan.py +0 -0
  20. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_cheung.py +0 -0
  21. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_dbis.py +0 -0
  22. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fds.py +0 -0
  23. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_fdsbase.py +0 -0
  24. {dbis_functional_dependencies-1.0.0 → dbis_functional_dependencies-1.0.2}/tests/test_tum.py +0 -0
@@ -1,11 +1,12 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dbis-functional-dependencies
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme
5
5
  Author-email: DBIS i5 RWTH Aachen <dbis-vl@dbis.rwth-aachen.de>
6
6
  Project-URL: Homepage, https://git.rwth-aachen.de/i5/teaching/dbis/dbis-functional-dependencies
7
7
  Classifier: Programming Language :: Python :: 3.10
8
8
  Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.13
9
10
  Requires-Python: >=3.10
10
11
  Description-Content-Type: text/markdown
11
12
  License-File: LICENSE
@@ -16,12 +17,12 @@ Requires-Dist: sphinx~=7.2
16
17
  Requires-Dist: sphinxcontrib-apidoc~=0.4
17
18
  Requires-Dist: functional-dependencies~=1.3
18
19
  Requires-Dist: build~=1.0
19
- Requires-Dist: pylodstorage~=0.4.11
20
20
  Provides-Extra: test
21
21
  Requires-Dist: black==23.12.1; extra == "test"
22
22
  Provides-Extra: build
23
- Requires-Dist: twine==4.*; extra == "build"
23
+ Requires-Dist: twine==6.*; extra == "build"
24
24
  Requires-Dist: build==1.*; extra == "build"
25
+ Dynamic: license-file
25
26
 
26
27
  # DBIS Functional Dependencies
27
28
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name="dbis-functional-dependencies"
3
- version='1.0.0'
3
+ version='1.0.2'
4
4
  description="RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme"
5
5
 
6
6
  authors = [
@@ -10,7 +10,8 @@ readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  classifiers = [
12
12
  "Programming Language :: Python :: 3.10",
13
- "Programming Language :: Python :: 3.11"
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.13"
14
15
  ]
15
16
 
16
17
  dependencies = [
@@ -21,7 +22,6 @@ dependencies = [
21
22
  "sphinxcontrib-apidoc~=0.4",
22
23
  "functional-dependencies~=1.3",
23
24
  "build~=1.0",
24
- "pylodstorage~=0.4.11"
25
25
  ]
26
26
 
27
27
  [project.optional-dependencies]
@@ -29,7 +29,7 @@ test = [
29
29
  "black==23.12.1"
30
30
  ]
31
31
  build = [
32
- "twine==4.*",
32
+ "twine==6.*",
33
33
  "build==1.*"
34
34
  ]
35
35
 
@@ -0,0 +1,166 @@
1
+ """
2
+ Created on 2022-06-11
3
+ @author: wf
4
+ """
5
+ import time
6
+ from dbis_functional_dependencies.BCNF import FunctionalDependencySet
7
+ import sqlite3
8
+
9
+
10
+ class FDCheck:
11
+ """
12
+ check functional dependencies for a tabular dataset in list of dicts form
13
+ """
14
+
15
+ def __init__(self, lod: list, debug: bool = False):
16
+ """
17
+ construct me with the given list of dicts
18
+
19
+ Args:
20
+ lod(list): the list of dicts (table) to check
21
+ debug(bool): if true switch on debugging
22
+ """
23
+ self.lod = lod
24
+ self.debug = debug
25
+ self.entityInfo = None
26
+ self.conn = None
27
+
28
+ def createDatabase(
29
+ self,
30
+ entityName,
31
+ primaryKey=None,
32
+ executeMany=True,
33
+ fixNone=False,
34
+ fixDates=False,
35
+ debug=False,
36
+ doClose=True,
37
+ ):
38
+ """
39
+ create a database for my list of Records
40
+
41
+ Args:
42
+ entityName(string): the name of the entity type to be used as a table name
43
+ primaryKey(string): the name of the key / column to be used as a primary key
44
+ executeMany(boolean): True if executeMany mode of sqlite3 should be used
45
+ fixNone(boolean): fix dict entries that are undefined to have a "None" entry
46
+ debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
47
+ doClose(boolean): True if the connection should be closed
48
+
49
+ """
50
+ size = len(self.lod)
51
+ if self.debug:
52
+ print(
53
+ "%s size is %d fixNone is %r fixDates is: %r"
54
+ % (entityName, size, fixNone, fixDates)
55
+ )
56
+
57
+ self.conn = sqlite3.connect(":memory:")
58
+ cursor = self.conn.cursor()
59
+
60
+ # Infer schema
61
+ keys = self.lod[0].keys()
62
+ columns = []
63
+ for k in keys:
64
+ value = self.lod[0][k]
65
+ sql_type = (
66
+ "INTEGER"
67
+ if isinstance(value, int)
68
+ else "REAL"
69
+ if isinstance(value, float)
70
+ else "TEXT"
71
+ )
72
+ col_def = f"{k} {sql_type}"
73
+ if k == primaryKey:
74
+ col_def += " PRIMARY KEY"
75
+ columns.append(col_def)
76
+ create_stmt = f"CREATE TABLE {entityName} ({', '.join(columns)});"
77
+ if debug:
78
+ print(create_stmt)
79
+ cursor.execute(create_stmt)
80
+
81
+ # Prepare data
82
+ col_names = list(keys)
83
+ placeholders = ", ".join(["?"] * len(col_names))
84
+ insert_stmt = (
85
+ f"INSERT INTO {entityName} ({', '.join(col_names)}) VALUES ({placeholders})"
86
+ )
87
+
88
+ values = []
89
+ for row in self.lod:
90
+ values.append(tuple(row.get(k, None) for k in col_names))
91
+
92
+ startTime = time.time()
93
+ if executeMany:
94
+ cursor.executemany(insert_stmt, values)
95
+ else:
96
+ for v in values:
97
+ cursor.execute(insert_stmt, v)
98
+ self.conn.commit()
99
+
100
+ elapsed = max(1e-12, time.time() - startTime)
101
+ if self.debug:
102
+ print(
103
+ f"adding {size} {entityName} records took {elapsed:.3f} s => {size/elapsed:.0f} records/s"
104
+ )
105
+
106
+ cursor.execute(f"SELECT * FROM {entityName}")
107
+ resultList = cursor.fetchall()
108
+ print(
109
+ f"selecting {len(resultList)} {entityName} records took {elapsed:.3f} s => {len(resultList)/elapsed:.0f} records/s"
110
+ )
111
+
112
+ self.entityInfo = {
113
+ "name": entityName,
114
+ "typeMap": {k: type(self.lod[0][k]) for k in keys},
115
+ "fields": list(keys),
116
+ }
117
+
118
+ if doClose:
119
+ cursor.close()
120
+ self.conn.close()
121
+ self.conn = None
122
+
123
+ return self.entityInfo
124
+
125
+ def findFDs(self):
126
+ """
127
+ find functional dependencies
128
+
129
+ https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
130
+ Return:
131
+ FunctionalDependencySet: the set of functional dependencies
132
+ """
133
+ if self.entityInfo is None or not self.conn:
134
+ raise Exception("createDatabase must be called before findFDs")
135
+
136
+ table_name = self.entityInfo["name"]
137
+ fields = self.entityInfo["fields"]
138
+
139
+ fds = FunctionalDependencySet()
140
+ for i, field in enumerate(fields):
141
+ attr1_var = chr(ord("A") + i)
142
+ fds.add_attribute(attr1_var, field)
143
+
144
+ cursor = self.conn.cursor()
145
+ for i, field_1 in enumerate(fields):
146
+ attr1_var = chr(ord("A") + i)
147
+ for j, field_2 in enumerate(fields):
148
+ if i == j:
149
+ continue
150
+ attr2_var = chr(ord("A") + j)
151
+ sql = (
152
+ f"SELECT {field_1}, COUNT(DISTINCT {field_2}) as c\n"
153
+ f"FROM {table_name}\n"
154
+ f"GROUP BY {field_1}\n"
155
+ f"HAVING c > 1"
156
+ )
157
+ cursor.execute(sql)
158
+ hits = cursor.fetchall()
159
+ if self.debug:
160
+ print(f"{sql.strip()}\n{hits}")
161
+ if not hits:
162
+ fds.add_dependency(attr1_var, attr2_var)
163
+
164
+ cursor.close()
165
+ self.fds = fds
166
+ return fds
@@ -1,11 +1,12 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: dbis-functional-dependencies
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: RWTH Aachen Computer Science i5/dbis assets for Lecture Datenbanken und Informationssysteme
5
5
  Author-email: DBIS i5 RWTH Aachen <dbis-vl@dbis.rwth-aachen.de>
6
6
  Project-URL: Homepage, https://git.rwth-aachen.de/i5/teaching/dbis/dbis-functional-dependencies
7
7
  Classifier: Programming Language :: Python :: 3.10
8
8
  Classifier: Programming Language :: Python :: 3.11
9
+ Classifier: Programming Language :: Python :: 3.13
9
10
  Requires-Python: >=3.10
10
11
  Description-Content-Type: text/markdown
11
12
  License-File: LICENSE
@@ -16,12 +17,12 @@ Requires-Dist: sphinx~=7.2
16
17
  Requires-Dist: sphinxcontrib-apidoc~=0.4
17
18
  Requires-Dist: functional-dependencies~=1.3
18
19
  Requires-Dist: build~=1.0
19
- Requires-Dist: pylodstorage~=0.4.11
20
20
  Provides-Extra: test
21
21
  Requires-Dist: black==23.12.1; extra == "test"
22
22
  Provides-Extra: build
23
- Requires-Dist: twine==4.*; extra == "build"
23
+ Requires-Dist: twine==6.*; extra == "build"
24
24
  Requires-Dist: build==1.*; extra == "build"
25
+ Dynamic: license-file
25
26
 
26
27
  # DBIS Functional Dependencies
27
28
 
@@ -5,10 +5,9 @@ sphinx~=7.2
5
5
  sphinxcontrib-apidoc~=0.4
6
6
  functional-dependencies~=1.3
7
7
  build~=1.0
8
- pylodstorage~=0.4.11
9
8
 
10
9
  [build]
11
- twine==4.*
10
+ twine==6.*
12
11
  build==1.*
13
12
 
14
13
  [test]
@@ -7,7 +7,6 @@ from pathlib import Path
7
7
  import json
8
8
 
9
9
  from tests.fdstest import FunctionalDependencySetTest
10
- from lodstorage.jsonable import JSONAble
11
10
 
12
11
  from dbis_functional_dependencies.fdcheck import FDCheck
13
12
 
@@ -33,8 +32,8 @@ class Test_FD_FromData(FunctionalDependencySetTest):
33
32
  the jsonFileNme to read from
34
33
  """
35
34
  jsonFilePath = f"{self.sampleDataPath}/{jsonFileName}"
36
- jsonStr = JSONAble.readJsonFromFile(jsonFilePath)
37
- lod = json.loads(jsonStr)
35
+ with open(jsonFilePath, "r", encoding="utf-8") as f:
36
+ lod = json.load(f)
38
37
  return lod
39
38
 
40
39
  def testExamples(self):
@@ -56,7 +55,7 @@ class Test_FD_FromData(FunctionalDependencySetTest):
56
55
  print(lod)
57
56
  self.assertTrue(type(lod) is list)
58
57
  expectedLen = expected[i]["len"]
59
- self.assertEquals(expectedLen, len(lod))
58
+ self.assertEqual(expectedLen, len(lod))
60
59
  fdCheck = FDCheck(lod, debug=True)
61
60
  entityName = example["entityName"]
62
61
  fdCheck.createDatabase(entityName, doClose=False)
@@ -1,113 +0,0 @@
1
- """
2
- Created on 2022-06-11
3
- @author: wf
4
- """
5
- import time
6
- from dbis_functional_dependencies.BCNF import FunctionalDependencySet
7
- from lodstorage.sql import SQLDB
8
-
9
-
10
- class FDCheck:
11
- """
12
- check functional dependencies for a tabular dataset in list of dicts form
13
- """
14
-
15
- def __init__(self, lod: list, debug: bool = False):
16
- """
17
- construct me with the given list of dicts
18
-
19
- Args:
20
- lod(list): the list of dicts (table) to check
21
- debug(bool): if true switch on debugging
22
- """
23
- self.lod = lod
24
- self.debug = debug
25
- self.entityInfo = None
26
-
27
- def createDatabase(
28
- self,
29
- entityName,
30
- primaryKey=None,
31
- executeMany=True,
32
- fixNone=False,
33
- fixDates=False,
34
- debug=False,
35
- doClose=True,
36
- ):
37
- """
38
- create a database for my list of Records
39
-
40
- Args:
41
- entityName(string): the name of the entity type to be used as a table name
42
- primaryKey(string): the name of the key / column to be used as a primary key
43
- executeMany(boolean): True if executeMany mode of sqlite3 should be used
44
- fixNone(boolean): fix dict entries that are undefined to have a "None" entry
45
- debug(boolean): True if debug information e.g. CREATE TABLE and INSERT INTO commands should be shown
46
- doClose(boolean): True if the connection should be closed
47
-
48
- """
49
- size = len(self.lod)
50
- if self.debug:
51
- print(
52
- "%s size is %d fixNone is %r fixDates is: %r"
53
- % (entityName, size, fixNone, fixDates)
54
- )
55
- self.sqlDB = SQLDB(debug=debug, errorDebug=True)
56
- entityInfo = self.sqlDB.createTable(self.lod, entityName, primaryKey)
57
- startTime = time.time()
58
- self.sqlDB.store(self.lod, entityInfo, executeMany=executeMany, fixNone=fixNone)
59
- elapsed = (
60
- 0.000000000001 if time.time() - startTime == 0 else time.time() - startTime
61
- )
62
- if self.debug:
63
- print(
64
- "adding %d %s records took %5.3f s => %5.f records/s"
65
- % (size, entityName, elapsed, size / elapsed)
66
- )
67
- if self.debug:
68
- resultList = self.sqlDB.queryAll(entityInfo, fixDates=fixDates)
69
- print(
70
- "selecting %d %s records took %5.3f s => %5.f records/s"
71
- % (len(resultList), entityName, elapsed, len(resultList) / elapsed)
72
- )
73
- if doClose:
74
- self.sqlDB.close()
75
- self.entityInfo = entityInfo
76
- return entityInfo
77
-
78
- def findFDs(self):
79
- """
80
- find functional dependencies
81
-
82
- https://github.com/gustavclausen/functional-dependency-finder/blob/master/main.py
83
- Return:
84
- FunctionalDependencySet: the set of functional dependencies
85
- """
86
- if self.entityInfo is None:
87
- raise Exception("createDataBase needed to supply entityInfo")
88
- fields = list(self.entityInfo.typeMap.keys())
89
- table_name = self.entityInfo.name
90
- fds = FunctionalDependencySet()
91
- for i, field in enumerate(fields):
92
- attr1_var = chr(ord("A") + i)
93
- fds.add_attribute(attr1_var, field)
94
- for i, field in enumerate(fields):
95
- attr1_var = chr(ord("A") + i)
96
- for j in range(0, len(fields)):
97
- if i == j:
98
- continue
99
-
100
- field_1 = fields[i]
101
- field_2 = fields[j]
102
- attr2_var = chr(ord("A") + j)
103
- sql = f"SELECT {field_1}, COUNT(DISTINCT {field_2}) c FROM {table_name} GROUP BY {field_1} HAVING c > 1"
104
- hits = self.sqlDB.query(sql)
105
- if self.debug:
106
- print(f"{sql}\n{hits}")
107
-
108
- if len(hits) == 0:
109
- # Functional dependency found: it's not the case that there's more than one value (field_2)
110
- # associated with field_1
111
- fds.add_dependency(attr1_var, attr2_var)
112
- self.fds = fds
113
- return fds