jupyter-duckdb 0.9.2.2.dev202401171015__tar.gz → 1.4.106__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {jupyter-duckdb-0.9.2.2.dev202401171015/src/jupyter_duckdb.egg-info → jupyter_duckdb-1.4.106}/PKG-INFO +64 -19
  2. jupyter-duckdb-0.9.2.2.dev202401171015/PKG-INFO → jupyter_duckdb-1.4.106/README.md +50 -33
  3. jupyter_duckdb-1.4.106/setup.py +54 -0
  4. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/Column.py +2 -1
  5. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/Connection.py +15 -1
  6. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/Table.py +9 -1
  7. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/__init__.py +2 -8
  8. jupyter_duckdb-1.4.106/src/duckdb_kernel/db/error/EmptyResultError.py +5 -0
  9. jupyter_duckdb-1.4.106/src/duckdb_kernel/db/error/__init__.py +1 -0
  10. {jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/db → jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation}/duckdb/Connection.py +49 -23
  11. jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation/postgres/Connection.py +244 -0
  12. jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation/postgres/util.py +14 -0
  13. {jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/db → jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation}/sqlite/Connection.py +22 -8
  14. jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation/sqlite/__init__.py +1 -0
  15. jupyter_duckdb-1.4.106/src/duckdb_kernel/kernel.py +765 -0
  16. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/magics/MagicCommand.py +34 -10
  17. jupyter_duckdb-1.4.106/src/duckdb_kernel/magics/MagicCommandCallback.py +24 -0
  18. jupyter_duckdb-1.4.106/src/duckdb_kernel/magics/MagicCommandHandler.py +133 -0
  19. jupyter_duckdb-1.4.106/src/duckdb_kernel/magics/MagicState.py +11 -0
  20. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/magics/__init__.py +1 -0
  21. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/DCParser.py +10 -7
  22. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/LogicParser.py +6 -6
  23. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/ParserError.py +18 -0
  24. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/RAParser.py +23 -16
  25. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/__init__.py +1 -0
  26. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/DCOperand.py +7 -4
  27. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/RAElement.py +6 -3
  28. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/__init__.py +1 -1
  29. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/And.py +1 -1
  30. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/ConditionalSet.py +67 -19
  31. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Cross.py +1 -1
  32. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Difference.py +2 -2
  33. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Divide.py +1 -1
  34. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/Division.py +44 -0
  35. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/FullOuterJoin.py +37 -0
  36. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/LeftOuterJoin.py +24 -0
  37. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/LeftSemiJoin.py +24 -0
  38. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/RightOuterJoin.py +24 -0
  39. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/elements/binary/RightSemiJoin.py +24 -0
  40. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/__init__.py +22 -5
  41. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/tokenizer/Token.py +24 -3
  42. jupyter_duckdb-1.4.106/src/duckdb_kernel/parser/util/QuerySplitter.py +87 -0
  43. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/util/RenamableColumn.py +2 -2
  44. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/util/RenamableColumnList.py +22 -2
  45. jupyter_duckdb-1.4.106/src/duckdb_kernel/tests/__init__.py +76 -0
  46. jupyter_duckdb-1.4.106/src/duckdb_kernel/tests/test_dc.py +454 -0
  47. jupyter_duckdb-1.4.106/src/duckdb_kernel/tests/test_ra.py +1625 -0
  48. jupyter_duckdb-1.4.106/src/duckdb_kernel/tests/test_sql.py +48 -0
  49. jupyter_duckdb-1.4.106/src/duckdb_kernel/util/SQL.py +6 -0
  50. jupyter_duckdb-1.4.106/src/duckdb_kernel/util/TestError.py +4 -0
  51. jupyter_duckdb-1.4.106/src/duckdb_kernel/visualization/Plotly.py +144 -0
  52. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/visualization/RATreeDrawer.py +34 -2
  53. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/visualization/SchemaDrawer.py +3 -0
  54. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/visualization/__init__.py +1 -0
  55. jupyter_duckdb-1.4.106/src/duckdb_kernel/visualization/lib/__init__.py +53 -0
  56. jupyter_duckdb-1.4.106/src/duckdb_kernel/visualization/lib/plotly-3.0.1.min.js +3879 -0
  57. jupyter_duckdb-1.4.106/src/duckdb_kernel/visualization/lib/ra.css +3 -0
  58. jupyter_duckdb-1.4.106/src/duckdb_kernel/visualization/lib/ra.js +55 -0
  59. jupyter-duckdb-0.9.2.2.dev202401171015/README.md → jupyter_duckdb-1.4.106/src/jupyter_duckdb.egg-info/PKG-INFO +78 -15
  60. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/jupyter_duckdb.egg-info/SOURCES.txt +31 -8
  61. jupyter_duckdb-1.4.106/src/jupyter_duckdb.egg-info/requires.txt +4 -0
  62. jupyter-duckdb-0.9.2.2.dev202401171015/setup.py +0 -54
  63. jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/kernel.py +0 -492
  64. jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/magics/MagicCommandCallback.py +0 -20
  65. jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/magics/MagicCommandHandler.py +0 -84
  66. jupyter-duckdb-0.9.2.2.dev202401171015/src/jupyter_duckdb.egg-info/requires.txt +0 -4
  67. jupyter-duckdb-0.9.2.2.dev202401171015/test/test_dc.py +0 -188
  68. jupyter-duckdb-0.9.2.2.dev202401171015/test/test_ra.py +0 -554
  69. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/setup.cfg +0 -0
  70. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/__init__.py +0 -0
  71. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/__main__.py +0 -0
  72. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/Constraint.py +0 -0
  73. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/DatabaseError.py +0 -0
  74. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/db/ForeignKey.py +0 -0
  75. {jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/db → jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation}/duckdb/__init__.py +0 -0
  76. {jupyter-duckdb-0.9.2.2.dev202401171015/src/duckdb_kernel/db/sqlite → jupyter_duckdb-1.4.106/src/duckdb_kernel/db/implementation/postgres}/__init__.py +0 -0
  77. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/kernel.json +0 -0
  78. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/magics/MagicCommandException.py +0 -0
  79. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/LogicElement.py +0 -0
  80. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/LogicOperand.py +0 -0
  81. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/LogicOperator.py +0 -0
  82. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/RABinaryOperator.py +0 -0
  83. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/RAOperand.py +0 -0
  84. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/RAOperator.py +0 -0
  85. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/RAUnaryOperator.py +0 -0
  86. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Add.py +0 -0
  87. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/ArrowLeft.py +0 -0
  88. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Equal.py +0 -0
  89. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/GreaterThan.py +0 -0
  90. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/GreaterThanEqual.py +0 -0
  91. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Intersection.py +0 -0
  92. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Join.py +0 -0
  93. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/LessThan.py +0 -0
  94. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/LessThanEqual.py +0 -0
  95. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Minus.py +0 -0
  96. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Multiply.py +0 -0
  97. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Or.py +0 -0
  98. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Unequal.py +0 -0
  99. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/binary/Union.py +0 -0
  100. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/unary/Not.py +0 -0
  101. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/unary/Projection.py +0 -0
  102. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/unary/Rename.py +0 -0
  103. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/unary/Selection.py +0 -0
  104. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/elements/unary/__init__.py +0 -0
  105. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/tokenizer/Tokenizer.py +0 -0
  106. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/tokenizer/__init__.py +0 -0
  107. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/parser/util/__init__.py +0 -0
  108. {jupyter-duckdb-0.9.2.2.dev202401171015/test → jupyter_duckdb-1.4.106/src/duckdb_kernel/tests}/test_result_comparison.py +0 -0
  109. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/util/ResultSetComparator.py +0 -0
  110. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/util/__init__.py +0 -0
  111. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/util/formatting.py +0 -0
  112. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/duckdb_kernel/visualization/Drawer.py +0 -0
  113. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/jupyter_duckdb.egg-info/dependency_links.txt +0 -0
  114. {jupyter-duckdb-0.9.2.2.dev202401171015 → jupyter_duckdb-1.4.106}/src/jupyter_duckdb.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: jupyter-duckdb
3
- Version: 0.9.2.2.dev202401171015
3
+ Version: 1.4.106
4
4
  Summary: a basic wrapper kernel for DuckDB
5
5
  Home-page: https://github.com/erictroebs/jupyter-duckdb
6
6
  Author: Eric Tröbs
@@ -12,9 +12,19 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.10
13
13
  Description-Content-Type: text/markdown
14
14
  Requires-Dist: jupyter
15
- Requires-Dist: graphviz==0.20.1
15
+ Requires-Dist: graphviz==0.21
16
16
  Requires-Dist: checkmarkandcross
17
- Requires-Dist: duckdb==0.9.2
17
+ Requires-Dist: duckdb==1.4.1
18
+ Dynamic: author
19
+ Dynamic: author-email
20
+ Dynamic: classifier
21
+ Dynamic: description
22
+ Dynamic: description-content-type
23
+ Dynamic: home-page
24
+ Dynamic: project-url
25
+ Dynamic: requires-dist
26
+ Dynamic: requires-python
27
+ Dynamic: summary
18
28
 
19
29
  # DuckDB Kernel for Jupyter
20
30
 
@@ -22,10 +32,6 @@ This is a simple DuckDB wrapper kernel which accepts SQL as input, executes it
22
32
  using a previously loaded DuckDB instance and formats the output as a table.
23
33
  There are some magic commands that make teaching easier with this kernel.
24
34
 
25
- ## Quick Start
26
-
27
- [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Fdbgit.prakinf.tu-ilmenau.de%2Fertr8623%2Fjupyter-duckdb.git/master)
28
-
29
35
  ## Table of Contents
30
36
 
31
37
  - [Setup](#setup)
@@ -39,6 +45,7 @@ There are some magic commands that make teaching easier with this kernel.
39
45
  - [Ship Tests With Your Notebook](#ship-tests-with-your-notebooks)
40
46
  - [Relational Algebra](#relational-algebra)
41
47
  - [Domain Calculus](#domain-calculus)
48
+ - [Automated Parser Selection](#automated-parser-selection)
42
49
 
43
50
  ## Setup
44
51
 
@@ -75,6 +82,12 @@ Execute the following command to pull and run a prepared image.
75
82
  docker run -p 8888:8888 troebs/jupyter-duckdb
76
83
  ```
77
84
 
85
+ There is also a second image. It contains an additional instance of PostgreSQL:
86
+
87
+ ```bash
88
+ docker run -p 8888:8888 troebs/jupyter-duckdb:postgresql
89
+ ```
90
+
78
91
  This image can also be used with JupyterHub and the
79
92
  [DockerSpawner / SwarmSpawner](https://github.com/jupyterhub/dockerspawner)
80
93
  and probably with the
@@ -106,28 +119,36 @@ another DuckDB file or a file with SQL statements. In the first case the
106
119
  included tables will be copied to the new database, while in the second case the
107
120
  SQL statements are just executed. We find this feature very useful to work in a
108
121
  temporary copy of the data and therefore be able to restart at any time. The
109
- last optional parameter `WITH_TESTS` is described in
110
- detail [below](#ship-tests-with-your-notebooks).
122
+ optional parameter `NAME` may be used to name a connection and reference it
123
+ later by using the magic command `USE`.
111
124
 
112
125
  ```
113
126
  %CREATE data.duckdb OF my_statements.sql
114
127
  ```
115
128
 
116
129
  `LOAD` on the other hand loads an existing database and returns an error if it
117
- does not exist. (That is why `OF` cannot be used with `LOAD`! `WITH_TESTS` on
118
- the other hand is available also with this magic command.)
130
+ does not exist. (That is why `OF` cannot be used with `LOAD`! `NAME` on the
131
+ other hand is available also with this magic command.)
119
132
 
120
133
  ```
121
134
  %LOAD data.duckdb
122
135
  ```
123
136
 
124
- Only one database can be open at any time. If a new database is created or
125
- loaded, the current one is closed first and saved to disk if necessary.
137
+ Multiple databases can be open at any time. If a new database with the same
138
+ name is created or loaded, the current one is closed first and saved to disk
139
+ if necessary.
126
140
 
127
141
  Please note that `:memory:` is also a valid file path for DuckDB. The data is
128
142
  then stored exclusively in the main memory. In combination with `CREATE`
129
143
  and `OF` this makes it possible to work on a temporary copy in memory.
130
144
 
145
+ Although the name suggests otherwise, the kernel can also be used with other
146
+ databases:
147
+ - **SQLite** is automatically used as a fallback if the DuckDB dependency is
148
+ missing.
149
+ - To connect to a **PostgreSQL** instance, you need to specify a database URI
150
+ starting with `(postgresql|postgres|pgsql|psql|pg)://`.
151
+
131
152
  ### Schema Diagrams
132
153
 
133
154
  The magic command `SCHEMA` can be used to create a simple schema diagram of the
@@ -143,6 +164,10 @@ representation requires more space, but can improve readability.
143
164
  %SCHEMA TD
144
165
  ```
145
166
 
167
+ The optional argument `ONLY`, followed by one or more table names separated by a
168
+ comma, can be used to display only the named tables and all those connected with
169
+ a foreign key.
170
+
146
171
  Graphviz (`dot` in PATH) is required to render schema diagrams.
147
172
 
148
173
  ### Number of Rows
@@ -190,10 +215,10 @@ FROM bar
190
215
 
191
216
  ### Ship Tests With Your Notebooks
192
217
 
193
- Simple tests can be loaded together with the database with the help of
194
- the `WITH_TESTS` parameter. These tests are stored as a JSON file. Each test is
195
- assigned a unique name, a result set and whether the test should check the order
196
- of the result. A very simple test file looks like the following JSON object:
218
+ Simple tests can be loaded from json files with the help of magic command
219
+ `LOAD_TESTS`. These tests are stored as a JSON file. Each test is assigned a
220
+ unique name, a result set and whether the test should check the order of the
221
+ result. A very simple test file looks like the following JSON object:
197
222
 
198
223
  ```json
199
224
  {
@@ -224,6 +249,11 @@ UNION
224
249
  SELECT 1, 'Name 1'
225
250
  ```
226
251
 
252
+ By default, failed tests will display an explanation, but the notebook will
253
+ continue to run. Set the `DUCKDB_TESTS_RAISE_EXCEPTION` environment variable to
254
+ `true` to raise an exception when a test fails. This can be useful for automated
255
+ testing in CI environments.
256
+
227
257
  Disclaimer: The integrated testing is work-in-progress and thus subject to
228
258
  potentially incompatible changes and enhancements.
229
259
 
@@ -234,7 +264,7 @@ magic command `RA` activates the relational algebra mode for a single cell:
234
264
 
235
265
  ```
236
266
  %RA
237
- π a, b (σ c = 1 (R))
267
+ π [a, b][c = 1] (R))
238
268
  ```
239
269
 
240
270
  The supported operations are:
@@ -247,6 +277,12 @@ The supported operations are:
247
277
  - Difference `\`
248
278
  - Natural Join `⋈`
249
279
  - Cross Product `×`
280
+ - Division `÷`
281
+
282
+ The optional flag `ANALYZE` can be used to add an execution diagram to the
283
+ output.
284
+
285
+ You can also add comments to queries using `--` or `/* */`, just like in SQL.
250
286
 
251
287
  The Dockerfile also installs the Jupyter Lab plugin
252
288
  [jupyter-ra-extension](https://pypi.org/project/jupyter-ra-extension/). It adds
@@ -262,3 +298,12 @@ magic command `DC` activates the domain calculus mode for a single cell:
262
298
  %DC
263
299
  { a, b | R(a, b, c) ∧ c = 1 }
264
300
  ```
301
+
302
+ ### Automated Parser Selection
303
+
304
+ `%ALL_RA` or `%ALL_DC` enables the corresponding parser for all subsequently
305
+ executed cells.
306
+
307
+ If the magic command `%AUTO_PARSER` is added to a cell, a parser is
308
+ automatically selected. If `%GUESS_PARSER` is executed, the parser is
309
+ automatically selected for all subsequent cells.
@@ -1,31 +1,9 @@
1
- Metadata-Version: 2.1
2
- Name: jupyter-duckdb
3
- Version: 0.9.2.2.dev202401171015
4
- Summary: a basic wrapper kernel for DuckDB
5
- Home-page: https://github.com/erictroebs/jupyter-duckdb
6
- Author: Eric Tröbs
7
- Author-email: eric.troebs@tu-ilmenau.de
8
- Project-URL: Bug Tracker, https://github.com/erictroebs/jupyter-duckdb/issues
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.10
13
- Description-Content-Type: text/markdown
14
- Requires-Dist: jupyter
15
- Requires-Dist: graphviz==0.20.1
16
- Requires-Dist: checkmarkandcross
17
- Requires-Dist: duckdb==0.9.2
18
-
19
1
  # DuckDB Kernel for Jupyter
20
2
 
21
3
  This is a simple DuckDB wrapper kernel which accepts SQL as input, executes it
22
4
  using a previously loaded DuckDB instance and formats the output as a table.
23
5
  There are some magic commands that make teaching easier with this kernel.
24
6
 
25
- ## Quick Start
26
-
27
- [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/git/https%3A%2F%2Fdbgit.prakinf.tu-ilmenau.de%2Fertr8623%2Fjupyter-duckdb.git/master)
28
-
29
7
  ## Table of Contents
30
8
 
31
9
  - [Setup](#setup)
@@ -39,6 +17,7 @@ There are some magic commands that make teaching easier with this kernel.
39
17
  - [Ship Tests With Your Notebook](#ship-tests-with-your-notebooks)
40
18
  - [Relational Algebra](#relational-algebra)
41
19
  - [Domain Calculus](#domain-calculus)
20
+ - [Automated Parser Selection](#automated-parser-selection)
42
21
 
43
22
  ## Setup
44
23
 
@@ -75,6 +54,12 @@ Execute the following command to pull and run a prepared image.
75
54
  docker run -p 8888:8888 troebs/jupyter-duckdb
76
55
  ```
77
56
 
57
+ There is also a second image. It contains an additional instance of PostgreSQL:
58
+
59
+ ```bash
60
+ docker run -p 8888:8888 troebs/jupyter-duckdb:postgresql
61
+ ```
62
+
78
63
  This image can also be used with JupyterHub and the
79
64
  [DockerSpawner / SwarmSpawner](https://github.com/jupyterhub/dockerspawner)
80
65
  and probably with the
@@ -106,28 +91,36 @@ another DuckDB file or a file with SQL statements. In the first case the
106
91
  included tables will be copied to the new database, while in the second case the
107
92
  SQL statements are just executed. We find this feature very useful to work in a
108
93
  temporary copy of the data and therefore be able to restart at any time. The
109
- last optional parameter `WITH_TESTS` is described in
110
- detail [below](#ship-tests-with-your-notebooks).
94
+ optional parameter `NAME` may be used to name a connection and reference it
95
+ later by using the magic command `USE`.
111
96
 
112
97
  ```
113
98
  %CREATE data.duckdb OF my_statements.sql
114
99
  ```
115
100
 
116
101
  `LOAD` on the other hand loads an existing database and returns an error if it
117
- does not exist. (That is why `OF` cannot be used with `LOAD`! `WITH_TESTS` on
118
- the other hand is available also with this magic command.)
102
+ does not exist. (That is why `OF` cannot be used with `LOAD`! `NAME` on the
103
+ other hand is available also with this magic command.)
119
104
 
120
105
  ```
121
106
  %LOAD data.duckdb
122
107
  ```
123
108
 
124
- Only one database can be open at any time. If a new database is created or
125
- loaded, the current one is closed first and saved to disk if necessary.
109
+ Multiple databases can be open at any time. If a new database with the same
110
+ name is created or loaded, the current one is closed first and saved to disk
111
+ if necessary.
126
112
 
127
113
  Please note that `:memory:` is also a valid file path for DuckDB. The data is
128
114
  then stored exclusively in the main memory. In combination with `CREATE`
129
115
  and `OF` this makes it possible to work on a temporary copy in memory.
130
116
 
117
+ Although the name suggests otherwise, the kernel can also be used with other
118
+ databases:
119
+ - **SQLite** is automatically used as a fallback if the DuckDB dependency is
120
+ missing.
121
+ - To connect to a **PostgreSQL** instance, you need to specify a database URI
122
+ starting with `(postgresql|postgres|pgsql|psql|pg)://`.
123
+
131
124
  ### Schema Diagrams
132
125
 
133
126
  The magic command `SCHEMA` can be used to create a simple schema diagram of the
@@ -143,6 +136,10 @@ representation requires more space, but can improve readability.
143
136
  %SCHEMA TD
144
137
  ```
145
138
 
139
+ The optional argument `ONLY`, followed by one or more table names separated by a
140
+ comma, can be used to display only the named tables and all those connected with
141
+ a foreign key.
142
+
146
143
  Graphviz (`dot` in PATH) is required to render schema diagrams.
147
144
 
148
145
  ### Number of Rows
@@ -190,10 +187,10 @@ FROM bar
190
187
 
191
188
  ### Ship Tests With Your Notebooks
192
189
 
193
- Simple tests can be loaded together with the database with the help of
194
- the `WITH_TESTS` parameter. These tests are stored as a JSON file. Each test is
195
- assigned a unique name, a result set and whether the test should check the order
196
- of the result. A very simple test file looks like the following JSON object:
190
+ Simple tests can be loaded from json files with the help of magic command
191
+ `LOAD_TESTS`. These tests are stored as a JSON file. Each test is assigned a
192
+ unique name, a result set and whether the test should check the order of the
193
+ result. A very simple test file looks like the following JSON object:
197
194
 
198
195
  ```json
199
196
  {
@@ -224,6 +221,11 @@ UNION
224
221
  SELECT 1, 'Name 1'
225
222
  ```
226
223
 
224
+ By default, failed tests will display an explanation, but the notebook will
225
+ continue to run. Set the `DUCKDB_TESTS_RAISE_EXCEPTION` environment variable to
226
+ `true` to raise an exception when a test fails. This can be useful for automated
227
+ testing in CI environments.
228
+
227
229
  Disclaimer: The integrated testing is work-in-progress and thus subject to
228
230
  potentially incompatible changes and enhancements.
229
231
 
@@ -234,7 +236,7 @@ magic command `RA` activates the relational algebra mode for a single cell:
234
236
 
235
237
  ```
236
238
  %RA
237
- π a, b (σ c = 1 (R))
239
+ π [a, b][c = 1] (R))
238
240
  ```
239
241
 
240
242
  The supported operations are:
@@ -247,6 +249,12 @@ The supported operations are:
247
249
  - Difference `\`
248
250
  - Natural Join `⋈`
249
251
  - Cross Product `×`
252
+ - Division `÷`
253
+
254
+ The optional flag `ANALYZE` can be used to add an execution diagram to the
255
+ output.
256
+
257
+ You can also add comments to queries using `--` or `/* */`, just like in SQL.
250
258
 
251
259
  The Dockerfile also installs the Jupyter Lab plugin
252
260
  [jupyter-ra-extension](https://pypi.org/project/jupyter-ra-extension/). It adds
@@ -262,3 +270,12 @@ magic command `DC` activates the domain calculus mode for a single cell:
262
270
  %DC
263
271
  { a, b | R(a, b, c) ∧ c = 1 }
264
272
  ```
273
+
274
+ ### Automated Parser Selection
275
+
276
+ `%ALL_RA` or `%ALL_DC` enables the corresponding parser for all subsequently
277
+ executed cells.
278
+
279
+ If the magic command `%AUTO_PARSER` is added to a cell, a parser is
280
+ automatically selected. If `%GUESS_PARSER` is executed, the parser is
281
+ automatically selected for all subsequent cells.
@@ -0,0 +1,54 @@
1
+ import os
2
+
3
+ # configuration
4
+ PACKAGE_VERSION = '1.4.106'
5
+ DUCKDB_VERSION = '1.4.1'
6
+
7
+ DEPENDENCIES = [
8
+ 'jupyter',
9
+ 'graphviz==0.21',
10
+ 'checkmarkandcross'
11
+ ]
12
+
13
+ if os.getenv('SQLITE') != '1' and os.getenv('DUCKDB') != '0':
14
+ DEPENDENCIES += [f'duckdb=={DUCKDB_VERSION}']
15
+
16
+ # main setup
17
+ if __name__ == '__main__':
18
+ from setuptools import setup, find_namespace_packages
19
+
20
+ # load README.md as long_description
21
+ with open('README.md', 'r', encoding='utf-8') as file:
22
+ long_description = file.read()
23
+
24
+ # main setup call
25
+ setup(
26
+ name='jupyter-duckdb',
27
+ version=PACKAGE_VERSION,
28
+ python_requires='>=3.10',
29
+ install_requires=DEPENDENCIES,
30
+ author='Eric Tröbs',
31
+ author_email='eric.troebs@tu-ilmenau.de',
32
+ description='a basic wrapper kernel for DuckDB',
33
+ long_description=long_description,
34
+ long_description_content_type='text/markdown',
35
+ url='https://github.com/erictroebs/jupyter-duckdb',
36
+ project_urls={
37
+ 'Bug Tracker': 'https://github.com/erictroebs/jupyter-duckdb/issues',
38
+ },
39
+ classifiers=[
40
+ 'Programming Language :: Python :: 3',
41
+ 'License :: OSI Approved :: MIT License',
42
+ 'Operating System :: OS Independent',
43
+ ],
44
+ package_dir={'': 'src'},
45
+ packages=find_namespace_packages(where='src'),
46
+ include_package_data=True,
47
+ package_data={
48
+ 'duckdb_kernel': [
49
+ 'kernel.json',
50
+ 'visualization/lib/*.css',
51
+ 'visualization/lib/*.js',
52
+ ]
53
+ }
54
+ )
@@ -4,10 +4,11 @@ from .Table import Table
4
4
 
5
5
 
6
6
  class Column:
7
- def __init__(self, table: Table, name: str, data_type: str):
7
+ def __init__(self, table: Table, name: str, data_type: str, null: bool):
8
8
  self.table: Table = table
9
9
  self.name: str = name
10
10
  self.data_type: str = data_type
11
+ self.null: bool = null
11
12
 
12
13
  def __hash__(self):
13
14
  return self.name.__hash__()
@@ -1,6 +1,6 @@
1
1
  from typing import Dict, List, Tuple, Any
2
2
 
3
- from . import Table
3
+ from .Table import Table
4
4
 
5
5
 
6
6
  class Connection:
@@ -10,6 +10,20 @@ class Connection:
10
10
  def close(self):
11
11
  pass
12
12
 
13
+ def copy(self) -> 'Connection':
14
+ raise NotImplementedError
15
+
16
+ @staticmethod
17
+ def plain_explain() -> bool:
18
+ return False
19
+
20
+ @staticmethod
21
+ def multiple_statements_per_query() -> bool:
22
+ return True
23
+
24
+ def __str__(self) -> str:
25
+ raise NotImplementedError
26
+
13
27
  def execute(self, query: str) -> Tuple[List[str], List[List[Any]]]:
14
28
  raise NotImplementedError
15
29
 
@@ -3,7 +3,7 @@ from typing import List, Optional
3
3
 
4
4
  from . import Column
5
5
  from . import ForeignKey
6
- from .Constraint import Constraint
6
+ from . import Constraint
7
7
 
8
8
 
9
9
  class Table:
@@ -14,11 +14,19 @@ class Table:
14
14
  self.unique_keys: List[Constraint] = []
15
15
  self.foreign_keys: List[ForeignKey] = []
16
16
 
17
+ @staticmethod
18
+ def normalize_name(name: str) -> str:
19
+ return name.lower()
20
+
17
21
  @property
18
22
  def id(self) -> str:
19
23
  name = re.sub(r'[^A-Za-z]', '_', self.name)
20
24
  return f'table_{name}'
21
25
 
26
+ @property
27
+ def normalized_name(self) -> str:
28
+ return self.normalize_name(self.name)
29
+
22
30
  def get_column(self, name: str) -> "Column":
23
31
  for column in self.columns:
24
32
  if column.name == name:
@@ -1,12 +1,6 @@
1
- from .DatabaseError import DatabaseError
2
1
  from .Column import Column
2
+ from .Connection import Connection
3
3
  from .Constraint import Constraint
4
+ from .DatabaseError import DatabaseError
4
5
  from .ForeignKey import ForeignKey
5
6
  from .Table import Table
6
-
7
- try:
8
- from .duckdb import Connection
9
- SQLITE_MODE = False
10
- except ImportError:
11
- from .sqlite import Connection
12
- SQLITE_MODE = True
@@ -0,0 +1,5 @@
1
+ from ..DatabaseError import DatabaseError
2
+
3
+
4
+ class EmptyResultError(DatabaseError):
5
+ pass
@@ -0,0 +1 @@
1
+ from .EmptyResultError import EmptyResultError
@@ -2,17 +2,29 @@ from typing import Dict, List, Tuple, Any
2
2
 
3
3
  import duckdb
4
4
 
5
- from .. import DatabaseError, Column, Constraint, ForeignKey, Table
6
- from ..Connection import Connection as Base
5
+ from ... import DatabaseError, Column, Constraint, ForeignKey, Table
6
+ from ...Connection import Connection as Base
7
+ from ...error import EmptyResultError
7
8
 
8
9
 
9
10
  class Connection(Base):
10
11
  def __init__(self, path: str):
12
+ self.path: str = path
11
13
  self.con: duckdb.DuckDBPyConnection = duckdb.connect(path, read_only=False)
12
14
 
13
15
  def close(self):
14
16
  self.con.close()
15
17
 
18
+ def copy(self) -> 'Connection':
19
+ return Connection(self.path)
20
+
21
+ @staticmethod
22
+ def plain_explain() -> bool:
23
+ return True
24
+
25
+ def __str__(self) -> str:
26
+ return f'DuckDB: {self.path}'
27
+
16
28
  def execute(self, query: str) -> Tuple[List[str], List[List[Any]]]:
17
29
  with self.con.cursor() as cursor:
18
30
  try:
@@ -28,7 +40,10 @@ class Connection(Base):
28
40
  raise e
29
41
 
30
42
  # get rows
31
- rows = cursor.fetchall()
43
+ try:
44
+ rows = cursor.fetchall()
45
+ except duckdb.InvalidInputException as e:
46
+ raise EmptyResultError(str(e))
32
47
 
33
48
  # get columns
34
49
  if cursor.description is None:
@@ -40,7 +55,7 @@ class Connection(Base):
40
55
 
41
56
  def analyze(self) -> Dict[str, Table]:
42
57
  tables: Dict[str, Table] = {}
43
- constraints: Dict[int, Constraint] = {}
58
+ constraints: Dict[Tuple, Constraint] = {}
44
59
 
45
60
  # Get table names first. In the columns table we can not filter
46
61
  # for base tables and some of the tables might not be contained
@@ -51,21 +66,23 @@ class Connection(Base):
51
66
  WHERE table_type == 'BASE TABLE'
52
67
  ''').fetchall():
53
68
  table = Table(table_name)
54
- tables[table_name] = table
69
+ tables[table.normalized_name] = table
55
70
 
56
71
  # Get column names and data types for each table.
57
- for table_name, column_name, data_type in self.con.execute('''
72
+ for table_name, column_name, data_type, is_nullable in self.con.execute('''
58
73
  SELECT
59
74
  table_name,
60
75
  column_name,
61
- data_type
76
+ data_type,
77
+ is_nullable
62
78
  FROM information_schema.columns
63
79
  ORDER BY ordinal_position ASC
64
80
  ''').fetchall():
65
- if table_name in tables:
66
- table = tables[table_name]
81
+ normalized_table_name = Table.normalize_name(table_name)
82
+ if normalized_table_name in tables:
83
+ table = tables[normalized_table_name]
67
84
 
68
- column = Column(table, column_name, data_type)
85
+ column = Column(table, column_name, data_type, is_nullable == 'YES')
69
86
  table.columns.append(column)
70
87
 
71
88
  # Find primary keys.
@@ -79,10 +96,12 @@ class Connection(Base):
79
96
  ORDER BY constraint_index ASC
80
97
  ''').fetchall():
81
98
  # get table
82
- if table_name not in tables:
99
+ normalized_table_name = Table.normalize_name(table_name)
100
+
101
+ if normalized_table_name not in tables:
83
102
  raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
84
103
 
85
- table = tables[table_name]
104
+ table = tables[normalized_table_name]
86
105
 
87
106
  # store constraint
88
107
  if constraint_index in constraints:
@@ -93,7 +112,7 @@ class Connection(Base):
93
112
  table,
94
113
  tuple(table.get_column(c) for c in constraint_columns)
95
114
  )
96
- constraints[constraint_index] = constraint
115
+ constraints[(normalized_table_name, *constraint_columns)] = constraint
97
116
 
98
117
  # store key
99
118
  if table.primary_key is not None:
@@ -112,10 +131,12 @@ class Connection(Base):
112
131
  ORDER BY constraint_index ASC
113
132
  ''').fetchall():
114
133
  # get table
115
- if table_name not in tables:
134
+ normalized_table_name = Table.normalize_name(table_name)
135
+
136
+ if normalized_table_name not in tables:
116
137
  raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
117
138
 
118
- table = tables[table_name]
139
+ table = tables[normalized_table_name]
119
140
 
120
141
  # store constraint
121
142
  if constraint_index in constraints:
@@ -126,32 +147,37 @@ class Connection(Base):
126
147
  table,
127
148
  tuple(table.get_column(c) for c in constraint_columns)
128
149
  )
129
- constraints[constraint_index] = constraint
150
+ constraints[(normalized_table_name, *constraint_columns)] = constraint
130
151
 
131
152
  # store key
132
153
  table.unique_keys.append(constraint)
133
154
 
134
155
  # Find foreign keys.
135
- for table_name, constraint_index, constraint_columns in self.con.execute('''
156
+ for table_name, constraint_index, constraint_columns, referenced_table, referenced_column_names, in self.con.execute('''
136
157
  SELECT
137
158
  table_name,
138
159
  constraint_index,
139
- constraint_column_names
160
+ constraint_column_names,
161
+ referenced_table,
162
+ referenced_column_names
140
163
  FROM duckdb_constraints()
141
164
  WHERE constraint_type = 'FOREIGN KEY'
142
165
  ORDER BY constraint_index ASC
143
166
  ''').fetchall():
144
167
  # get table
145
- if table_name not in tables:
168
+ normalized_table_name = Table.normalize_name(table_name)
169
+
170
+ if normalized_table_name not in tables:
146
171
  raise AssertionError(f'unknown table {table_name} for constraint {constraint_index}')
147
172
 
148
- table = tables[table_name]
173
+ table = tables[normalized_table_name]
149
174
 
150
175
  # lookup constraint
151
- if constraint_index not in constraints:
152
- raise AssertionError(f'constraint with index {constraint_index} not discovered previously')
176
+ constraint_key = (Table.normalize_name(referenced_table), *referenced_column_names)
177
+ if constraint_key not in constraints:
178
+ raise AssertionError(f'constraint with key {constraint_key} not discovered previously')
153
179
 
154
- constraint = constraints[constraint_index]
180
+ constraint = constraints[constraint_key]
155
181
 
156
182
  # store key
157
183
  key = ForeignKey(tuple(table.get_column(c) for c in constraint_columns), constraint)