real-ladybug 0.0.1.dev1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of real-ladybug might be problematic. Click here for more details.

Files changed (114) hide show
  1. real_ladybug/__init__.py +83 -0
  2. real_ladybug/_lbug.cp311-win_amd64.pyd +0 -0
  3. real_ladybug/_lbug.exp +0 -0
  4. real_ladybug/_lbug.lib +0 -0
  5. real_ladybug/async_connection.py +226 -0
  6. real_ladybug/connection.py +323 -0
  7. real_ladybug/constants.py +7 -0
  8. real_ladybug/database.py +307 -0
  9. real_ladybug/prepared_statement.py +51 -0
  10. real_ladybug/py.typed +0 -0
  11. real_ladybug/query_result.py +511 -0
  12. real_ladybug/torch_geometric_feature_store.py +185 -0
  13. real_ladybug/torch_geometric_graph_store.py +131 -0
  14. real_ladybug/torch_geometric_result_converter.py +282 -0
  15. real_ladybug/types.py +39 -0
  16. real_ladybug-0.0.1.dev1.dist-info/METADATA +88 -0
  17. real_ladybug-0.0.1.dev1.dist-info/RECORD +114 -0
  18. real_ladybug-0.0.1.dev1.dist-info/WHEEL +5 -0
  19. real_ladybug-0.0.1.dev1.dist-info/licenses/LICENSE +21 -0
  20. real_ladybug-0.0.1.dev1.dist-info/top_level.txt +3 -0
  21. real_ladybug-0.0.1.dev1.dist-info/zip-safe +1 -0
  22. real_ladybug-source/scripts/antlr4/hash.py +2 -0
  23. real_ladybug-source/scripts/antlr4/keywordhandler.py +47 -0
  24. real_ladybug-source/scripts/collect-extensions.py +68 -0
  25. real_ladybug-source/scripts/collect-single-file-header.py +126 -0
  26. real_ladybug-source/scripts/export-dbs.py +101 -0
  27. real_ladybug-source/scripts/export-import-test.py +345 -0
  28. real_ladybug-source/scripts/extension/purge-beta.py +34 -0
  29. real_ladybug-source/scripts/generate-cpp-docs/collect_files.py +122 -0
  30. real_ladybug-source/scripts/generate-tinysnb.py +34 -0
  31. real_ladybug-source/scripts/get-clangd-diagnostics.py +233 -0
  32. real_ladybug-source/scripts/migrate-lbug-db.py +308 -0
  33. real_ladybug-source/scripts/multiplatform-test-helper/collect-results.py +71 -0
  34. real_ladybug-source/scripts/multiplatform-test-helper/notify-discord.py +68 -0
  35. real_ladybug-source/scripts/pip-package/package_tar.py +90 -0
  36. real_ladybug-source/scripts/pip-package/setup.py +130 -0
  37. real_ladybug-source/scripts/run-clang-format.py +408 -0
  38. real_ladybug-source/scripts/setup-extension-repo.py +67 -0
  39. real_ladybug-source/scripts/test-simsimd-dispatch.py +45 -0
  40. real_ladybug-source/scripts/update-nightly-build-version.py +81 -0
  41. real_ladybug-source/third_party/brotli/scripts/dictionary/step-01-download-rfc.py +16 -0
  42. real_ladybug-source/third_party/brotli/scripts/dictionary/step-02-rfc-to-bin.py +34 -0
  43. real_ladybug-source/third_party/brotli/scripts/dictionary/step-03-validate-bin.py +35 -0
  44. real_ladybug-source/third_party/brotli/scripts/dictionary/step-04-generate-java-literals.py +85 -0
  45. real_ladybug-source/third_party/pybind11/tools/codespell_ignore_lines_from_errors.py +35 -0
  46. real_ladybug-source/third_party/pybind11/tools/libsize.py +36 -0
  47. real_ladybug-source/third_party/pybind11/tools/make_changelog.py +63 -0
  48. real_ladybug-source/tools/python_api/build/real_ladybug/__init__.py +83 -0
  49. real_ladybug-source/tools/python_api/build/real_ladybug/async_connection.py +226 -0
  50. real_ladybug-source/tools/python_api/build/real_ladybug/connection.py +323 -0
  51. real_ladybug-source/tools/python_api/build/real_ladybug/constants.py +7 -0
  52. real_ladybug-source/tools/python_api/build/real_ladybug/database.py +307 -0
  53. real_ladybug-source/tools/python_api/build/real_ladybug/prepared_statement.py +51 -0
  54. real_ladybug-source/tools/python_api/build/real_ladybug/py.typed +0 -0
  55. real_ladybug-source/tools/python_api/build/real_ladybug/query_result.py +511 -0
  56. real_ladybug-source/tools/python_api/build/real_ladybug/torch_geometric_feature_store.py +185 -0
  57. real_ladybug-source/tools/python_api/build/real_ladybug/torch_geometric_graph_store.py +131 -0
  58. real_ladybug-source/tools/python_api/build/real_ladybug/torch_geometric_result_converter.py +282 -0
  59. real_ladybug-source/tools/python_api/build/real_ladybug/types.py +39 -0
  60. real_ladybug-source/tools/python_api/src_py/__init__.py +83 -0
  61. real_ladybug-source/tools/python_api/src_py/async_connection.py +226 -0
  62. real_ladybug-source/tools/python_api/src_py/connection.py +323 -0
  63. real_ladybug-source/tools/python_api/src_py/constants.py +7 -0
  64. real_ladybug-source/tools/python_api/src_py/database.py +307 -0
  65. real_ladybug-source/tools/python_api/src_py/prepared_statement.py +51 -0
  66. real_ladybug-source/tools/python_api/src_py/py.typed +0 -0
  67. real_ladybug-source/tools/python_api/src_py/query_result.py +511 -0
  68. real_ladybug-source/tools/python_api/src_py/torch_geometric_feature_store.py +185 -0
  69. real_ladybug-source/tools/python_api/src_py/torch_geometric_graph_store.py +131 -0
  70. real_ladybug-source/tools/python_api/src_py/torch_geometric_result_converter.py +282 -0
  71. real_ladybug-source/tools/python_api/src_py/types.py +39 -0
  72. real_ladybug-source/tools/python_api/test/conftest.py +230 -0
  73. real_ladybug-source/tools/python_api/test/disabled_test_extension.py +73 -0
  74. real_ladybug-source/tools/python_api/test/ground_truth.py +430 -0
  75. real_ladybug-source/tools/python_api/test/test_arrow.py +694 -0
  76. real_ladybug-source/tools/python_api/test/test_async_connection.py +159 -0
  77. real_ladybug-source/tools/python_api/test/test_blob_parameter.py +145 -0
  78. real_ladybug-source/tools/python_api/test/test_connection.py +49 -0
  79. real_ladybug-source/tools/python_api/test/test_database.py +234 -0
  80. real_ladybug-source/tools/python_api/test/test_datatype.py +372 -0
  81. real_ladybug-source/tools/python_api/test/test_df.py +564 -0
  82. real_ladybug-source/tools/python_api/test/test_dict.py +112 -0
  83. real_ladybug-source/tools/python_api/test/test_exception.py +54 -0
  84. real_ladybug-source/tools/python_api/test/test_fsm.py +227 -0
  85. real_ladybug-source/tools/python_api/test/test_get_header.py +49 -0
  86. real_ladybug-source/tools/python_api/test/test_helper.py +8 -0
  87. real_ladybug-source/tools/python_api/test/test_issue.py +147 -0
  88. real_ladybug-source/tools/python_api/test/test_iteration.py +96 -0
  89. real_ladybug-source/tools/python_api/test/test_networkx.py +437 -0
  90. real_ladybug-source/tools/python_api/test/test_parameter.py +340 -0
  91. real_ladybug-source/tools/python_api/test/test_prepared_statement.py +117 -0
  92. real_ladybug-source/tools/python_api/test/test_query_result.py +54 -0
  93. real_ladybug-source/tools/python_api/test/test_query_result_close.py +44 -0
  94. real_ladybug-source/tools/python_api/test/test_scan_pandas.py +676 -0
  95. real_ladybug-source/tools/python_api/test/test_scan_pandas_pyarrow.py +714 -0
  96. real_ladybug-source/tools/python_api/test/test_scan_polars.py +165 -0
  97. real_ladybug-source/tools/python_api/test/test_scan_pyarrow.py +167 -0
  98. real_ladybug-source/tools/python_api/test/test_timeout.py +11 -0
  99. real_ladybug-source/tools/python_api/test/test_torch_geometric.py +640 -0
  100. real_ladybug-source/tools/python_api/test/test_torch_geometric_remote_backend.py +111 -0
  101. real_ladybug-source/tools/python_api/test/test_udf.py +207 -0
  102. real_ladybug-source/tools/python_api/test/test_version.py +6 -0
  103. real_ladybug-source/tools/python_api/test/test_wal.py +80 -0
  104. real_ladybug-source/tools/python_api/test/type_aliases.py +10 -0
  105. real_ladybug-source/tools/rust_api/update_version.py +47 -0
  106. real_ladybug-source/tools/shell/test/conftest.py +218 -0
  107. real_ladybug-source/tools/shell/test/test_helper.py +60 -0
  108. real_ladybug-source/tools/shell/test/test_shell_basics.py +325 -0
  109. real_ladybug-source/tools/shell/test/test_shell_commands.py +656 -0
  110. real_ladybug-source/tools/shell/test/test_shell_control_edit.py +438 -0
  111. real_ladybug-source/tools/shell/test/test_shell_control_search.py +468 -0
  112. real_ladybug-source/tools/shell/test/test_shell_esc_edit.py +232 -0
  113. real_ladybug-source/tools/shell/test/test_shell_esc_search.py +162 -0
  114. real_ladybug-source/tools/shell/test/test_shell_flags.py +645 -0
@@ -0,0 +1,676 @@
1
+ import datetime
2
+ import re
3
+ from pathlib import Path
4
+ from uuid import UUID
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import pytest
9
+ from type_aliases import ConnDB
10
+
11
+ try:
12
+ from zoneinfo import ZoneInfo
13
+ except ImportError:
14
+ from backports.zoneinfo import ZoneInfo # type: ignore[no-redef]
15
+
16
+ import real_ladybug as lb
17
+ from real_ladybug.constants import ID, LABEL
18
+
19
+
20
+ def validate_scan_pandas_results(results: lb.QueryResult) -> None:
21
+ assert results.get_next() == [
22
+ True,
23
+ 1,
24
+ 10,
25
+ 100,
26
+ 1000,
27
+ -1,
28
+ -10,
29
+ -100,
30
+ -1000,
31
+ -0.5199999809265137,
32
+ 5132.12321,
33
+ datetime.datetime(1996, 4, 1, 12, 0, 11, 500001),
34
+ datetime.datetime(1996, 4, 1, 12, 0, 11, 500001, ZoneInfo("US/Eastern")),
35
+ datetime.datetime(1996, 4, 1, 12, 0, 11, 500001),
36
+ datetime.datetime(1996, 4, 1, 12, 0, 11, 500000),
37
+ datetime.datetime(1996, 4, 1, 12, 0, 11),
38
+ datetime.timedelta(microseconds=500),
39
+ None,
40
+ [],
41
+ 528,
42
+ 3.562,
43
+ ["Alice", None],
44
+ datetime.date(1996, 2, 15),
45
+ "12331",
46
+ UUID("d5a8ed71-6fc4-4cb3-acbc-2f5b73fd14bc"),
47
+ ]
48
+ assert results.get_next() == [
49
+ False,
50
+ 2,
51
+ 20,
52
+ 200,
53
+ 2000,
54
+ -2,
55
+ -20,
56
+ -200,
57
+ -2000,
58
+ None,
59
+ 24.222,
60
+ datetime.datetime(1981, 11, 13, 22, 2, 52, 2),
61
+ datetime.datetime(1981, 11, 13, 22, 2, 52, 2, ZoneInfo("US/Eastern")),
62
+ datetime.datetime(1981, 11, 13, 22, 2, 52, 2),
63
+ datetime.datetime(1981, 11, 13, 22, 2, 52),
64
+ datetime.datetime(1981, 11, 13, 22, 2, 52),
65
+ datetime.timedelta(seconds=1),
66
+ "Ascii only",
67
+ [40, 20, 10],
68
+ -9999,
69
+ 4.213,
70
+ [],
71
+ datetime.date(2013, 2, 22),
72
+ "test string",
73
+ UUID("9a2fc988-5c5d-4217-af9e-220aef5ce7b8"),
74
+ ]
75
+ assert results.get_next() == [
76
+ None,
77
+ 3,
78
+ 30,
79
+ 300,
80
+ 3000,
81
+ -3,
82
+ -30,
83
+ -300,
84
+ -3000,
85
+ -3.299999952316284,
86
+ None,
87
+ datetime.datetime(1972, 12, 21, 12, 5, 44, 500003),
88
+ datetime.datetime(1972, 12, 21, 12, 5, 44, 500003, ZoneInfo("US/Eastern")),
89
+ datetime.datetime(1972, 12, 21, 12, 5, 44, 500003),
90
+ datetime.datetime(1972, 12, 21, 12, 5, 44, 500000),
91
+ datetime.datetime(1972, 12, 21, 12, 5, 44),
92
+ datetime.timedelta(seconds=2, milliseconds=500),
93
+ "ñ中国字",
94
+ [30, None],
95
+ None,
96
+ None,
97
+ None,
98
+ datetime.date(2055, 1, 14),
99
+ "5.623",
100
+ UUID("166055ee-a481-4e67-a4fc-98682d3a3e20"),
101
+ ]
102
+ assert results.get_next() == [
103
+ False,
104
+ 4,
105
+ 40,
106
+ 400,
107
+ 4000,
108
+ -4,
109
+ -40,
110
+ -400,
111
+ -4000,
112
+ 4.400000095367432,
113
+ 4.444,
114
+ datetime.datetime(2008, 1, 11, 22, 10, 3, 4),
115
+ datetime.datetime(2008, 1, 11, 22, 10, 3, 4, ZoneInfo("US/Eastern")),
116
+ datetime.datetime(2008, 1, 11, 22, 10, 3, 4),
117
+ datetime.datetime(2008, 1, 11, 22, 10, 3),
118
+ datetime.datetime(2008, 1, 11, 22, 10, 3),
119
+ datetime.timedelta(seconds=3, milliseconds=22),
120
+ "😂",
121
+ None,
122
+ 56677,
123
+ 67.13,
124
+ ["Dan, Ella", "George"],
125
+ datetime.date(2018, 3, 17),
126
+ None,
127
+ UUID("d5a8ed71-6fc4-4cb3-acbc-2f5b73fd14bc"),
128
+ ]
129
+
130
+
131
+ def test_scan_pandas(conn_db_empty: ConnDB) -> None:
132
+ conn, _ = conn_db_empty
133
+ data = {
134
+ "BOOL": [True, False, None, False],
135
+ "UINT8": np.array([1, 2, 3, 4], dtype=np.uint8),
136
+ "UINT16": np.array([10, 20, 30, 40], dtype=np.uint16),
137
+ "UINT32": np.array([100, 200, 300, 400], dtype=np.uint32),
138
+ "UINT64": np.array([1000, 2000, 3000, 4000], dtype=np.uint64),
139
+ "INT8": np.array([-1, -2, -3, -4], dtype=np.int8),
140
+ "INT16": np.array([-10, -20, -30, -40], dtype=np.int16),
141
+ "INT32": np.array([-100, -200, -300, -400], dtype=np.int32),
142
+ "INT64": np.array([-1000, -2000, -3000, -4000], dtype=np.int64),
143
+ "FLOAT_32": np.array(
144
+ [-0.5199999809265137, float("nan"), -3.299999952316284, 4.400000095367432],
145
+ dtype=np.float32,
146
+ ),
147
+ "FLOAT_64": np.array([5132.12321, 24.222, float("nan"), 4.444], dtype=np.float64),
148
+ "datetime_microseconds": np.array([
149
+ np.datetime64("1996-04-01T12:00:11.500001000"),
150
+ np.datetime64("1981-11-13T22:02:52.000002000"),
151
+ np.datetime64("1972-12-21T12:05:44.500003000"),
152
+ np.datetime64("2008-01-11T22:10:03.000004000"),
153
+ ]).astype("datetime64[us]"),
154
+ "datetime_microseconds_tz": np.array([
155
+ np.datetime64("1996-04-01T12:00:11.500001000"),
156
+ np.datetime64("1981-11-13T22:02:52.000002000"),
157
+ np.datetime64("1972-12-21T12:05:44.500003000"),
158
+ np.datetime64("2008-01-11T22:10:03.000004000"),
159
+ ]).astype("datetime64[us]"),
160
+ "datetime_nanoseconds": np.array([
161
+ np.datetime64("1996-04-01T12:00:11.500001"),
162
+ np.datetime64("1981-11-13T22:02:52.000002"),
163
+ np.datetime64("1972-12-21T12:05:44.500003"),
164
+ np.datetime64("2008-01-11T22:10:03.000004"),
165
+ ]).astype("datetime64[ns]"),
166
+ "datetime_milliseconds": np.array([
167
+ np.datetime64("1996-04-01T12:00:11.500001"),
168
+ np.datetime64("1981-11-13T22:02:52.000002"),
169
+ np.datetime64("1972-12-21T12:05:44.500003"),
170
+ np.datetime64("2008-01-11T22:10:03.000004"),
171
+ ]).astype("datetime64[ms]"),
172
+ "datetime_seconds": np.array([
173
+ np.datetime64("1996-04-01T12:00:11"),
174
+ np.datetime64("1981-11-13T22:02:52"),
175
+ np.datetime64("1972-12-21T12:05:44"),
176
+ np.datetime64("2008-01-11T22:10:03"),
177
+ ]).astype("datetime64[s]"),
178
+ "timedelta_nanoseconds": [
179
+ np.timedelta64(500000, "ns"),
180
+ np.timedelta64(1000000000, "ns"),
181
+ np.timedelta64(2500000000, "ns"),
182
+ np.timedelta64(3022000000, "ns"),
183
+ ],
184
+ "name": [None, "Ascii only", "ñ中国字", "😂"],
185
+ "worked_hours": [[], [40, 20, 10], [30, None], None],
186
+ "int_object": np.array([528, -9999, None, 56677], dtype=object),
187
+ "float_object": np.array([3.562, 4.213, None, 67.13], dtype=object),
188
+ "used_names": np.array([["Alice", None], [], None, ["Dan, Ella", "George"]], dtype=object),
189
+ "past_date": np.array(
190
+ [
191
+ datetime.date(1996, 2, 15),
192
+ datetime.date(2013, 2, 22),
193
+ datetime.date(2055, 1, 14),
194
+ datetime.date(2018, 3, 17),
195
+ ],
196
+ dtype=object,
197
+ ),
198
+ "mixed_type": np.array([12331, "test string", 5.623, None], dtype="object"),
199
+ "uuid_type": [
200
+ UUID("d5a8ed71-6fc4-4cb3-acbc-2f5b73fd14bc"),
201
+ UUID("9a2fc988-5c5d-4217-af9e-220aef5ce7b8"),
202
+ UUID("166055ee-a481-4e67-a4fc-98682d3a3e20"),
203
+ UUID("d5a8ed71-6fc4-4cb3-acbc-2f5b73fd14bc"),
204
+ ],
205
+ }
206
+ df = pd.DataFrame(data)
207
+ df["datetime_microseconds_tz"] = df["datetime_microseconds_tz"].dt.tz_localize("US/Eastern")
208
+ results = conn.execute("LOAD FROM df RETURN *")
209
+ validate_scan_pandas_results(results)
210
+
211
+ results_parameterized = conn.execute("LOAD FROM $df RETURN *", {"df": df})
212
+ validate_scan_pandas_results(results_parameterized)
213
+
214
+
215
+ def test_scan_pandas_timestamp(conn_db_empty: ConnDB) -> None:
216
+ conn, _ = conn_db_empty
217
+ ts = np.array(
218
+ [
219
+ datetime.datetime(1996, 2, 15, hour=12, minute=22, second=54),
220
+ datetime.datetime(2011, 3, 11, minute=11, hour=5),
221
+ None,
222
+ datetime.datetime(2033, 2, 11, microsecond=55),
223
+ ],
224
+ dtype="object",
225
+ )
226
+ df = pd.DataFrame({"timestamp": ts})
227
+ # Pandas automatically converts the column from object to timestamp, so we need to manually cast back to object.
228
+ df = df.astype({"timestamp": "object"}, copy=False)
229
+ results = conn.execute("LOAD FROM df RETURN *")
230
+ assert results.get_next() == [datetime.datetime(1996, 2, 15, hour=12, minute=22, second=54)]
231
+ assert results.get_next() == [datetime.datetime(2011, 3, 11, minute=11, hour=5)]
232
+ assert results.get_next() == [None]
233
+ assert results.get_next() == [datetime.datetime(2033, 2, 11, microsecond=55)]
234
+
235
+
236
+ def test_replace_failure(conn_db_empty: ConnDB) -> None:
237
+ conn, _ = conn_db_empty
238
+
239
+ with pytest.raises(RuntimeError, match=re.escape("Binder exception: Variable x is not in scope.")):
240
+ conn.execute("LOAD FROM x RETURN *;")
241
+
242
+ with pytest.raises(
243
+ RuntimeError,
244
+ match=re.escape(
245
+ "Binder exception: Function READ_PANDAS did not receive correct arguments:\n"
246
+ "Actual: (STRING)\n"
247
+ "Expected: (POINTER)\n"
248
+ ),
249
+ ):
250
+ conn.execute("CALL READ_PANDAS('df213') WHERE id > 20 RETURN id + 5, weight")
251
+
252
+
253
+ def test_int64_overflow(conn_db_empty: ConnDB) -> None:
254
+ conn, _ = conn_db_empty
255
+ overflowpd = pd.DataFrame({"id": [4, 2**125]})
256
+ with pytest.raises(
257
+ RuntimeError,
258
+ match=re.escape(
259
+ "Conversion exception: Failed to cast value: "
260
+ "Python value '42535295865117307932921825928971026432' to INT64"
261
+ ),
262
+ ):
263
+ conn.execute("LOAD FROM overflowpd RETURN *;")
264
+
265
+
266
+ def test_scan_pandas_with_filter(conn_db_empty: ConnDB) -> None:
267
+ conn, _ = conn_db_empty
268
+ data = {
269
+ "id": np.array([22, 3, 100], dtype=np.uint8),
270
+ "weight": np.array([23.2, 31.7, 42.9], dtype=np.float64),
271
+ "name": ["ñ", "日本字", "😊"],
272
+ }
273
+ df = pd.DataFrame(data)
274
+ # Dummy query to ensure the READ_PANDAS function is persistent after a write transaction.
275
+ conn.execute("CREATE NODE TABLE PERSON1(ID INT64, PRIMARY KEY(ID))")
276
+ results = conn.execute("LOAD FROM df WHERE id > 20 RETURN id + 5, weight, name")
277
+ assert results.get_next() == [27, 23.2, "ñ"]
278
+ assert results.get_next() == [105, 42.9, "😊"]
279
+
280
+
281
+ def test_large_pd(conn_db_empty: ConnDB) -> None:
282
+ conn, _ = conn_db_empty
283
+ num_rows = 40000
284
+ odd_numbers = [2 * i + 1 for i in range(num_rows)]
285
+ even_numbers = [2 * i for i in range(num_rows)]
286
+ df = pd.DataFrame({
287
+ "odd": np.array(odd_numbers, dtype=np.int64),
288
+ "even": np.array(even_numbers, dtype=np.int64),
289
+ })
290
+ result = conn.execute("LOAD FROM df RETURN *").get_as_df()
291
+ assert result["odd"].to_list() == odd_numbers
292
+ assert result["even"].to_list() == even_numbers
293
+
294
+
295
+ def test_pandas_scan_demo(conn_db_empty: ConnDB) -> None:
296
+ conn, _ = conn_db_empty
297
+
298
+ conn.execute("CREATE NODE TABLE student (ID int64, height int32, PRIMARY KEY(ID))")
299
+ conn.execute("CREATE (s:student {ID: 0, height: 70})")
300
+ conn.execute("CREATE (s:student {ID: 2, height: 64})")
301
+ conn.execute("CREATE (s:student {ID: 4, height: 67})")
302
+ conn.execute("CREATE (s:student {ID: 5, height: 64})")
303
+
304
+ id = np.array([0, 2, 3, 5, 7, 11, 13], dtype=np.int64)
305
+ age = np.array([42, 23, 33, 57, 67, 39, 11], dtype=np.uint16)
306
+ height_in_cm = np.array([167, 172, 183, 199, 149, 154, 165], dtype=np.uint32)
307
+ is_student = np.array([False, True, False, False, False, False, True], dtype=bool)
308
+ person = pd.DataFrame({"id": id, "age": age, "height": height_in_cm, "is_student": is_student})
309
+
310
+ result = conn.execute(
311
+ "LOAD FROM person with avg(height / 2.54) as height_in_inch MATCH (s:student) WHERE s.height > "
312
+ "height_in_inch RETURN s"
313
+ ).get_as_df()
314
+ assert len(result) == 2
315
+ assert result["s"][0] == {
316
+ "ID": 0,
317
+ ID: {"offset": 0, "table": 0},
318
+ LABEL: "student",
319
+ "height": 70,
320
+ }
321
+ assert result["s"][1] == {
322
+ "ID": 4,
323
+ ID: {"offset": 2, "table": 0},
324
+ LABEL: "student",
325
+ "height": 67,
326
+ }
327
+
328
+ conn.execute("CREATE NODE TABLE person(ID INT64, age UINT16, height UINT32, is_student BOOLean, PRIMARY KEY(ID))")
329
+ conn.execute("LOAD FROM person CREATE (p:person {ID: id, age: age, height: height, is_student: is_student})")
330
+ result = conn.execute("MATCH (p:person) return p.*").get_as_df()
331
+ assert np.all(result["p.ID"].to_list() == id)
332
+ assert np.all(result["p.age"].to_list() == age)
333
+ assert np.all(result["p.height"].to_list() == height_in_cm)
334
+ assert np.all(result["p.is_student"].to_list() == is_student)
335
+
336
+
337
+ def test_scan_pandas_copy_subquery(conn_db_empty: ConnDB) -> None:
338
+ conn, _ = conn_db_empty
339
+ data = {"id": np.array([22, 3, 100], dtype=np.int64), "name": ["A", "B", "C"]}
340
+ df = pd.DataFrame(data)
341
+ conn.execute("CREATE NODE TABLE person(ID INT64, NAME STRING, PRIMARY KEY(ID))")
342
+ conn.execute("COPY person FROM (LOAD FROM df RETURN *)")
343
+ result = conn.execute("MATCH (p:person) RETURN p.*").get_as_df()
344
+ assert result["p.ID"].to_list() == [22, 3, 100]
345
+ assert result["p.NAME"].to_list() == ["A", "B", "C"]
346
+
347
+
348
+ def test_scan_all_null(conn_db_empty: ConnDB) -> None:
349
+ conn, _ = conn_db_empty
350
+ data = {"id": np.array([None, None, None], dtype=object)}
351
+ df = pd.DataFrame(data)
352
+ result = conn.execute("LOAD FROM df RETURN *")
353
+ assert result.get_next() == [None]
354
+ assert result.get_next() == [None]
355
+ assert result.get_next() == [None]
356
+
357
+
358
+ def test_copy_from_scan_pandas_result(conn_db_empty: ConnDB) -> None:
359
+ conn, _ = conn_db_empty
360
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
361
+ conn.execute("CREATE NODE TABLE Person(name STRING, age INT64, PRIMARY KEY (name));")
362
+ conn.execute("COPY Person FROM (LOAD FROM df WHERE age < 30 RETURN *);")
363
+ result = conn.execute("match (p:Person) return p.*")
364
+ assert result.get_next() == ["Noura", 25]
365
+ assert result.has_next() is False
366
+
367
+
368
+ def test_scan_from_py_arrow_pandas(conn_db_empty: ConnDB) -> None:
369
+ conn, _ = conn_db_empty
370
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]}).convert_dtypes(
371
+ dtype_backend="pyarrow"
372
+ )
373
+ result = conn.execute("LOAD FROM df RETURN *;")
374
+ assert result.get_next() == ["Adam", 30]
375
+ assert result.get_next() == ["Karissa", 40]
376
+ assert result.get_next() == ["Zhang", 50]
377
+ assert result.get_next() == ["Noura", 25]
378
+ assert result.has_next() is False
379
+
380
+
381
+ def test_scan_long_utf8_string(conn_db_empty: ConnDB) -> None:
382
+ conn, _ = conn_db_empty
383
+ data = {"name": ["很长的一段中文", "短", "非常长的中文"]}
384
+ df = pd.DataFrame(data)
385
+ result = conn.execute("LOAD FROM df WHERE name = '非常长的中文' RETURN count(*);")
386
+ assert result.get_next() == [1]
387
+
388
+
389
+ def test_copy_from_pandas_object(conn_db_empty: ConnDB) -> None:
390
+ conn, _ = conn_db_empty
391
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
392
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
393
+ conn.execute("COPY Person FROM df;")
394
+ result = conn.execute("match (p:Person) return p.*")
395
+ assert result.get_next() == ["Adam", "30"]
396
+ assert result.get_next() == ["Karissa", "40"]
397
+ assert result.get_next() == ["Zhang", "50"]
398
+ assert result.get_next() == ["Noura", "25"]
399
+ assert result.has_next() is False
400
+ df = pd.DataFrame({"f": ["Adam", "Karissa"], "t": ["Zhang", "Zhang"]})
401
+ conn.execute("CREATE REL TABLE Knows(FROM Person TO Person);")
402
+ conn.execute("COPY Knows FROM df")
403
+ result = conn.execute("match (p:Person)-[]->(:Person {name: 'Zhang'}) return p.*")
404
+ assert result.get_next() == ["Adam", "30"]
405
+ assert result.get_next() == ["Karissa", "40"]
406
+ assert result.has_next() is False
407
+
408
+
409
+ def test_copy_from_pandas_object_skip(conn_db_empty: ConnDB) -> None:
410
+ conn, _ = conn_db_empty
411
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
412
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
413
+ conn.execute("COPY Person FROM df(SKIP=2);")
414
+ result = conn.execute("match (p:Person) return p.*")
415
+ assert result.get_next() == ["Zhang", "50"]
416
+ assert result.get_next() == ["Noura", "25"]
417
+ assert result.has_next() is False
418
+ df = pd.DataFrame({"f": ["Adam", "Noura"], "t": ["Zhang", "Zhang"]})
419
+ conn.execute("CREATE REL TABLE Knows(FROM Person TO Person);")
420
+ conn.execute("COPY Knows FROM df(SKIP=1)")
421
+ result = conn.execute("match (p:Person)-[]->(:Person {name: 'Zhang'}) return p.*")
422
+ assert result.get_next() == ["Noura", "25"]
423
+ assert result.has_next() is False
424
+
425
+
426
+ def test_copy_from_pandas_object_limit(conn_db_empty: ConnDB) -> None:
427
+ conn, _ = conn_db_empty
428
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
429
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
430
+ conn.execute("COPY Person FROM df(LIMIT=2);")
431
+ result = conn.execute("match (p:Person) return p.*")
432
+ assert result.get_next() == ["Adam", "30"]
433
+ assert result.get_next() == ["Karissa", "40"]
434
+ assert result.has_next() is False
435
+ df = pd.DataFrame({"f": ["Adam", "Zhang"], "t": ["Karissa", "Karissa"]})
436
+ conn.execute("CREATE REL TABLE Knows(FROM Person TO Person);")
437
+ conn.execute("COPY Knows FROM df(LIMIT=1)")
438
+ result = conn.execute("match (p:Person)-[]->(:Person {name: 'Karissa'}) return p.*")
439
+ assert result.get_next() == ["Adam", "30"]
440
+ assert result.has_next() is False
441
+
442
+
443
+ def test_copy_from_pandas_object_skip_and_limit(conn_db_empty: ConnDB) -> None:
444
+ conn, _ = conn_db_empty
445
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
446
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
447
+ conn.execute("COPY Person FROM df(SKIP=1, LIMIT=2);")
448
+ result = conn.execute("match (p:Person) return p.*")
449
+ assert result.get_next() == ["Karissa", "40"]
450
+ assert result.get_next() == ["Zhang", "50"]
451
+ assert result.has_next() is False
452
+
453
+
454
+ def test_copy_from_pandas_object_skip_bounds_check(conn_db_empty: ConnDB) -> None:
455
+ conn, _ = conn_db_empty
456
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
457
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
458
+ conn.execute("COPY Person FROM df(SKIP=10);")
459
+ result = conn.execute("match (p:Person) return p.*")
460
+ assert result.has_next() is False
461
+
462
+
463
+ def test_copy_from_pandas_object_limit_bounds_check(conn_db_empty: ConnDB) -> None:
464
+ conn, _ = conn_db_empty
465
+ df = pd.DataFrame({"name": ["Adam", "Karissa", "Zhang", "Noura"], "age": [30, 40, 50, 25]})
466
+ conn.execute("CREATE NODE TABLE Person(name STRING, age STRING, PRIMARY KEY (name));")
467
+ conn.execute("COPY Person FROM df(LIMIT=10);")
468
+ result = conn.execute("match (p:Person) return p.*")
469
+ assert result.get_next() == ["Adam", "30"]
470
+ assert result.get_next() == ["Karissa", "40"]
471
+ assert result.get_next() == ["Zhang", "50"]
472
+ assert result.get_next() == ["Noura", "25"]
473
+ assert result.has_next() is False
474
+
475
+
476
+ def test_copy_from_pandas_date(conn_db_empty: ConnDB) -> None:
477
+ conn, _ = conn_db_empty
478
+ df = pd.DataFrame({"id": [1, 2], "date": [pd.Timestamp("2024-01-03"), pd.Timestamp("2023-10-10")]})
479
+ conn.execute("CREATE NODE TABLE Person(id INT16, d TIMESTAMP, PRIMARY KEY (id));")
480
+ conn.execute("COPY Person FROM df;")
481
+ result = conn.execute("match (p:Person) return p.*")
482
+ assert result.get_next() == [1, datetime.datetime(2024, 1, 3)]
483
+ assert result.get_next() == [2, datetime.datetime(2023, 10, 10)]
484
+ assert result.has_next() is False
485
+
486
+
487
+ def test_scan_string_to_nested(conn_db_empty: ConnDB) -> None:
488
+ conn, _ = conn_db_empty
489
+ df = pd.DataFrame({
490
+ "id": ["1"],
491
+ "lstcol": ["[1,2,3]"],
492
+ "mapcol": ["{'a'=1,'b'=2}"],
493
+ "structcol": ["{a:1,b:2}"],
494
+ "lstlstcol": ["[[],[1,2,3],[4,5,6]]"],
495
+ })
496
+ conn.execute(
497
+ "CREATE NODE TABLE tab(id INT64, lstcol INT64[], mapcol MAP(STRING, INT64), structcol STRUCT(a INT64, b INT64), lstlstcol INT64[][], PRIMARY KEY(id))"
498
+ )
499
+ conn.execute("COPY tab from df")
500
+ result = conn.execute("match (t:tab) return t.*")
501
+ assert result.get_next() == [
502
+ 1,
503
+ [1, 2, 3],
504
+ {"'a'": 1, "'b'": 2},
505
+ {"a": 1, "b": 2},
506
+ [[], [1, 2, 3], [4, 5, 6]],
507
+ ]
508
+ assert not result.has_next()
509
+
510
+
511
+ def test_pandas_scan_ignore_errors(conn_db_empty: ConnDB) -> None:
512
+ conn, _ = conn_db_empty
513
+ df = pd.DataFrame({"id": [1, 2, 3, 1]})
514
+ conn.execute("CREATE NODE TABLE person(id INT64, PRIMARY KEY(id))")
515
+ conn.execute("COPY person FROM $dataframe(IGNORE_ERRORS=true)", {"dataframe": df})
516
+
517
+ people = conn.execute("MATCH (p:person) RETURN p.id")
518
+ assert people.get_next() == [1]
519
+ assert people.get_next() == [2]
520
+ assert people.get_next() == [3]
521
+ assert not people.has_next()
522
+
523
+ warnings = conn.execute("CALL show_warnings() RETURN *")
524
+ assert warnings.get_next()[1].startswith("Found duplicated primary key value 1")
525
+ assert not warnings.has_next()
526
+
527
+
528
+ def test_pandas_scan_ignore_errors_docs_example(conn_db_empty: ConnDB) -> None:
529
+ conn, _ = conn_db_empty
530
+ persons = ["Rhea", "Alice", "Rhea", None]
531
+ age = [25, 23, 25, 24]
532
+
533
+ df = pd.DataFrame({"name": persons, "age": age})
534
+ conn.execute("CREATE NODE TABLE Person(name STRING PRIMARY KEY, age INT64)")
535
+ conn.execute("COPY Person FROM $dataframe (ignore_errors=true)", {"dataframe": df})
536
+
537
+ people = conn.execute("MATCH (p:Person) RETURN p.name, p.age")
538
+ assert people.get_next() == ["Rhea", 25]
539
+ assert people.get_next() == ["Alice", 23]
540
+ assert not people.has_next()
541
+
542
+
543
+ def test_copy_from_pandas_multi_pairs(conn_db_empty: ConnDB) -> None:
544
+ conn, _ = conn_db_empty
545
+ conn.execute("CREATE NODE TABLE person(id INT64, PRIMARY KEY(id))")
546
+ conn.execute("CREATE (p:person {id: 3});")
547
+ conn.execute("CREATE (p:person {id: 4});")
548
+ conn.execute("CREATE NODE TABLE student(id INT64, PRIMARY KEY(id))")
549
+ conn.execute("CREATE (p:student {id: 2});")
550
+ conn.execute("CREATE REL TABLE knows(from person to person, from person to student, length int64)")
551
+ df = pd.DataFrame({"from": [3], "to": [4], "length": [252]})
552
+ conn.execute("COPY knows from df (from = 'person', to = 'person');")
553
+ result = conn.execute("match (:person)-[e:knows]->(:person) return e.*")
554
+ assert result.has_next()
555
+ assert result.get_next()[0] == 252
556
+ assert not result.has_next()
557
+
558
+
559
+ def test_scan_pandas_with_exists(conn_db_empty: ConnDB) -> None:
560
+ conn, _ = conn_db_empty
561
+ conn.execute("CREATE NODE TABLE person(id INT64, PRIMARY KEY(id))")
562
+ conn.execute("CREATE (p:person {id: 1})")
563
+ conn.execute("CREATE (p:person {id: 2})")
564
+ conn.execute("CREATE (p:person {id: 3})")
565
+ conn.execute("CREATE REL TABLE knows(from person to person)")
566
+ df = pd.DataFrame({
567
+ "from": [1, 2, 3],
568
+ "to": [3, 2, 1],
569
+ })
570
+ conn.execute(
571
+ "COPY knows from (load from df where not exists {MATCH (p:person)-[:knows]->(p1:person) WHERE p.id = from AND p1.id = to} return from + 1 - 1, to)"
572
+ )
573
+ res = conn.execute("MATCH (p:person)-[:knows]->(p1:person) return p.id, p1.id order by p.id, p1.id")
574
+ assert res.has_next()
575
+ tp = res.get_next()
576
+ assert tp[0] == 1
577
+ assert tp[1] == 3
578
+ tp = res.get_next()
579
+ assert tp[0] == 2
580
+ assert tp[1] == 2
581
+ tp = res.get_next()
582
+ assert tp[0] == 3
583
+ assert tp[1] == 1
584
+
585
+
586
+ def test_scan_empty_list(conn_db_empty: ConnDB) -> None:
587
+ conn, _ = conn_db_empty
588
+ df = pd.DataFrame({"id": ["1"], "lstcol": [[]]})
589
+ res = conn.execute("load from df return *")
590
+ assert res.has_next()
591
+ tp = res.get_next()
592
+ assert tp[0] == "1"
593
+ assert tp[1] == []
594
+
595
+
596
+ def test_scan_py_dict_struct_format(conn_db_empty: ConnDB) -> None:
597
+ conn, _ = conn_db_empty
598
+ df = pd.DataFrame({"id": [1, 3, 4], "dt": [{"key1": 5, "key3": 4}, {"key1": 10, "key3": 25}, None]})
599
+ res = conn.execute("LOAD FROM df RETURN *")
600
+ tp = res.get_next()
601
+ assert tp[0] == 1
602
+ assert tp[1] == {"key1": 5, "key3": 4}
603
+ tp = res.get_next()
604
+ assert tp[0] == 3
605
+ assert tp[1] == {"key1": 10, "key3": 25}
606
+ tp = res.get_next()
607
+ assert tp[0] == 4
608
+ assert tp[1] is None
609
+
610
+
611
+ def test_scan_py_dict_map_format(conn_db_empty: ConnDB) -> None:
612
+ conn, _ = conn_db_empty
613
+ df = pd.DataFrame({
614
+ "id": [1, 3, 4],
615
+ "dt": [
616
+ {"key": ["Alice", "Bob"], "value": [32, 41]},
617
+ {"key": ["Carol"], "value": [2]},
618
+ {"key": ["zoo", "ela", "dan"], "value": [44, 52, 88]},
619
+ ],
620
+ })
621
+ res = conn.execute("LOAD FROM df RETURN *")
622
+ tp = res.get_next()
623
+ assert tp[0] == 1
624
+ assert tp[1] == {"Alice": 32, "Bob": 41}
625
+ tp = res.get_next()
626
+ assert tp[0] == 3
627
+ assert tp[1] == {"Carol": 2}
628
+ tp = res.get_next()
629
+ assert tp[0] == 4
630
+ assert tp[1] == {"zoo": 44, "ela": 52, "dan": 88}
631
+
632
+ # If key and value size don't match, lbug sniffs it as struct.
633
+ df = pd.DataFrame({"id": [4], "dt": [{"key": ["Alice", "Bob"], "value": []}]})
634
+ res = conn.execute("LOAD FROM df RETURN *")
635
+ tup = res.get_next()
636
+ assert tup[0] == 4
637
+ assert tup[1] == {"key": ["Alice", "Bob"], "value": []}
638
+
639
+
640
+ def test_scan_py_dict_empty(conn_db_empty: ConnDB) -> None:
641
+ conn, _ = conn_db_empty
642
+ df = pd.DataFrame({"id": [], "dt": []})
643
+ res = conn.execute("LOAD FROM df RETURN *")
644
+ assert not res.has_next()
645
+
646
+
647
+ def test_df_with_struct_cast(conn_db_readonly: ConnDB) -> None:
648
+ conn, _ = conn_db_readonly
649
+ df = pd.DataFrame({"test": [{"a": 1}, {"a": 2}, {"a": 3}, {"b": "abc"}], "qwe": [1, 2, 3, False]})
650
+ res = conn.execute("load from df return test, qwe")
651
+ tup = res.get_next()
652
+ assert tup[0] == "{'a': 1}"
653
+ assert tup[1] == "1"
654
+ tup = res.get_next()
655
+ assert tup[0] == "{'a': 2}"
656
+ assert tup[1] == "2"
657
+ tup = res.get_next()
658
+ assert tup[0] == "{'a': 3}"
659
+ assert tup[1] == "3"
660
+ tup = res.get_next()
661
+ assert tup[0] == "{'b': 'abc'}"
662
+ assert tup[1] == "False"
663
+
664
+ df = pd.DataFrame({"test": [{"a": 1, "b": 4}, {"a": 2}]})
665
+ res = conn.execute("load from df return test")
666
+ tup = res.get_next()
667
+ assert tup[0] == "{'a': 1, 'b': 4}"
668
+ tup = res.get_next()
669
+ assert tup[0] == "{'a': 2}"
670
+
671
+ df = pd.DataFrame({"test": [{"a": 1}, {"a": "2"}]})
672
+ res = conn.execute("load from df return test")
673
+ tup = res.get_next()
674
+ assert tup[0] == "{'a': 1}"
675
+ tup = res.get_next()
676
+ assert tup[0] == "{'a': '2'}"