qql-cli 2.1.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qql_cli-2.1.0 → qql_cli-2.2.0}/PKG-INFO +15 -6
- {qql_cli-2.1.0 → qql_cli-2.2.0}/README.md +14 -5
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/getting-started.md +8 -2
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/index.html +3 -3
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/programmatic.md +18 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/reference.md +8 -3
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/search.md +57 -5
- {qql_cli-2.1.0 → qql_cli-2.2.0}/pyproject.toml +1 -1
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/ast_nodes.py +17 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/cli.py +32 -1
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/executor.py +76 -2
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/lexer.py +12 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/parser.py +72 -13
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/script.py +3 -1
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_executor.py +120 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_lexer.py +22 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_parser.py +70 -1
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_script.py +24 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/.github/workflows/ci.yml +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/.github/workflows/publish.yml +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/.gitignore +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/LICENSE +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/_config.yml +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/collections.md +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/filters.md +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/insert.md +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/robots.txt +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/scripts.md +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/sitemap.xml +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/main.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/Features.md +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/sample.qql +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/sample_v2.qql +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/__init__.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/config.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/dumper.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/embedder.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/exceptions.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/__init__.py +0 -0
- {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_dumper.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qql-cli
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pavanjava/qql
|
|
6
6
|
Project-URL: Repository, https://github.com/pavanjava/qql
|
|
@@ -56,9 +56,9 @@ Description-Content-Type: text/markdown
|
|
|
56
56
|
[](https://pypi.org/project/qql-cli/)
|
|
57
57
|
[](https://pypi.org/project/qql-cli/)
|
|
58
58
|
[](LICENSE)
|
|
59
|
-
[](tests/)
|
|
60
60
|
|
|
61
|
-
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
61
|
+
Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
62
62
|
|
|
63
63
|
```
|
|
64
64
|
qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
|
|
@@ -99,7 +99,7 @@ Your query string
|
|
|
99
99
|
Qdrant instance
|
|
100
100
|
```
|
|
101
101
|
|
|
102
|
-
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
|
|
102
|
+
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`.
|
|
103
103
|
|
|
104
104
|
---
|
|
105
105
|
|
|
@@ -133,7 +133,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
|
|
|
133
133
|
|---|---|
|
|
134
134
|
| [Getting Started](docs/getting-started.md) | Installation, connecting, first queries |
|
|
135
135
|
| [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
|
|
136
|
-
| [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
|
|
136
|
+
| [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, point retrieval, pagination, hybrid, reranking, recommendations |
|
|
137
137
|
| [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
|
|
138
138
|
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
|
|
139
139
|
| [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
|
|
@@ -153,11 +153,20 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
|
|
|
153
153
|
SEARCH articles SIMILAR TO 'query' LIMIT 10
|
|
154
154
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
|
|
155
155
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
|
|
156
|
+
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
|
|
156
157
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
|
|
157
158
|
|
|
159
|
+
-- Scroll
|
|
160
|
+
SCROLL FROM articles LIMIT 50
|
|
161
|
+
SCROLL FROM articles WHERE year >= 2024 LIMIT 50
|
|
162
|
+
SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
|
|
163
|
+
|
|
158
164
|
-- Recommend
|
|
159
165
|
RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
|
|
160
166
|
|
|
167
|
+
-- Select (retrieve a point by ID)
|
|
168
|
+
SELECT * FROM articles WHERE id = '3f2e1a4b-...'
|
|
169
|
+
|
|
161
170
|
-- Collections
|
|
162
171
|
CREATE COLLECTION articles
|
|
163
172
|
CREATE COLLECTION articles HYBRID
|
|
@@ -188,7 +197,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
|
|
|
188
197
|
pytest tests/ -v
|
|
189
198
|
```
|
|
190
199
|
|
|
191
|
-
Expected: **
|
|
200
|
+
Expected: **405 tests passing**.
|
|
192
201
|
|
|
193
202
|
---
|
|
194
203
|
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
[](https://pypi.org/project/qql-cli/)
|
|
6
6
|
[](https://pypi.org/project/qql-cli/)
|
|
7
7
|
[](LICENSE)
|
|
8
|
-
[](tests/)
|
|
9
9
|
|
|
10
|
-
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
10
|
+
Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
11
11
|
|
|
12
12
|
```
|
|
13
13
|
qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
|
|
@@ -48,7 +48,7 @@ Your query string
|
|
|
48
48
|
Qdrant instance
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
|
|
51
|
+
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`.
|
|
52
52
|
|
|
53
53
|
---
|
|
54
54
|
|
|
@@ -82,7 +82,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
|
|
|
82
82
|
|---|---|
|
|
83
83
|
| [Getting Started](docs/getting-started.md) | Installation, connecting, first queries |
|
|
84
84
|
| [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
|
|
85
|
-
| [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
|
|
85
|
+
| [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, point retrieval, pagination, hybrid, reranking, recommendations |
|
|
86
86
|
| [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
|
|
87
87
|
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
|
|
88
88
|
| [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
|
|
@@ -102,11 +102,20 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
|
|
|
102
102
|
SEARCH articles SIMILAR TO 'query' LIMIT 10
|
|
103
103
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
|
|
104
104
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
|
|
105
|
+
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
|
|
105
106
|
SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
|
|
106
107
|
|
|
108
|
+
-- Scroll
|
|
109
|
+
SCROLL FROM articles LIMIT 50
|
|
110
|
+
SCROLL FROM articles WHERE year >= 2024 LIMIT 50
|
|
111
|
+
SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
|
|
112
|
+
|
|
107
113
|
-- Recommend
|
|
108
114
|
RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
|
|
109
115
|
|
|
116
|
+
-- Select (retrieve a point by ID)
|
|
117
|
+
SELECT * FROM articles WHERE id = '3f2e1a4b-...'
|
|
118
|
+
|
|
110
119
|
-- Collections
|
|
111
120
|
CREATE COLLECTION articles
|
|
112
121
|
CREATE COLLECTION articles HYBRID
|
|
@@ -137,7 +146,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
|
|
|
137
146
|
pytest tests/ -v
|
|
138
147
|
```
|
|
139
148
|
|
|
140
|
-
Expected: **
|
|
149
|
+
Expected: **405 tests passing**.
|
|
141
150
|
|
|
142
151
|
---
|
|
143
152
|
|
|
@@ -24,7 +24,7 @@ Your query string
|
|
|
24
24
|
Qdrant instance
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
-
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
|
|
27
|
+
When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can override that with `FUSION 'dbsf'` on hybrid searches.
|
|
28
28
|
|
|
29
29
|
---
|
|
30
30
|
|
|
@@ -138,8 +138,14 @@ SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3
|
|
|
138
138
|
-- Filter results
|
|
139
139
|
SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 WHERE year >= 2023
|
|
140
140
|
|
|
141
|
+
-- Browse with pagination
|
|
142
|
+
SCROLL FROM notes LIMIT 10
|
|
143
|
+
|
|
141
144
|
-- List all collections
|
|
142
145
|
SHOW COLLECTIONS
|
|
146
|
+
|
|
147
|
+
-- Retrieve a point by ID
|
|
148
|
+
SELECT * FROM notes WHERE id = 1
|
|
143
149
|
```
|
|
144
150
|
|
|
145
151
|
---
|
|
@@ -147,7 +153,7 @@ SHOW COLLECTIONS
|
|
|
147
153
|
## Next Steps
|
|
148
154
|
|
|
149
155
|
- [INSERT / INSERT BULK](insert.md) — adding documents
|
|
150
|
-
- [SEARCH / RECOMMEND / Hybrid / RERANK](search.md) — querying
|
|
156
|
+
- [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](search.md) — querying
|
|
151
157
|
- [WHERE Filters](filters.md) — payload filtering
|
|
152
158
|
- [Collections & Quantization](collections.md) — managing collections
|
|
153
159
|
- [Scripts: EXECUTE / DUMP](scripts.md) — automating with script files
|
|
@@ -114,7 +114,7 @@
|
|
|
114
114
|
<a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI" alt="PyPI version" /></a>
|
|
115
115
|
<a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/pyversions/qql-cli" alt="Python versions" /></a>
|
|
116
116
|
<a href="https://github.com/pavanjava/qql/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License" /></a>
|
|
117
|
-
<a href="https://github.com/pavanjava/qql/actions"><img src="https://img.shields.io/badge/tests-
|
|
117
|
+
<a href="https://github.com/pavanjava/qql/actions"><img src="https://img.shields.io/badge/tests-405%20passing-brightgreen" alt="405 tests" /></a>
|
|
118
118
|
</div>
|
|
119
119
|
|
|
120
120
|
<pre><span class="cmt"># Install</span>
|
|
@@ -148,8 +148,8 @@
|
|
|
148
148
|
<p>Adding documents, batch inserts, payload types</p>
|
|
149
149
|
</a>
|
|
150
150
|
<a class="card" href="search">
|
|
151
|
-
<h3>SEARCH / RECOMMEND</h3>
|
|
152
|
-
<p>Semantic search, hybrid search, reranking, recommendations</p>
|
|
151
|
+
<h3>SEARCH / SELECT / SCROLL / RECOMMEND</h3>
|
|
152
|
+
<p>Semantic search, point retrieval, pagination, hybrid search, reranking, recommendations</p>
|
|
153
153
|
</a>
|
|
154
154
|
<a class="card" href="filters">
|
|
155
155
|
<h3>WHERE Filters</h3>
|
|
@@ -40,6 +40,15 @@ result = run_query(
|
|
|
40
40
|
for hit in result.data:
|
|
41
41
|
print(hit["score"], hit["payload"])
|
|
42
42
|
|
|
43
|
+
# Scroll / pagination
|
|
44
|
+
result = run_query(
|
|
45
|
+
"SCROLL FROM notes LIMIT 2",
|
|
46
|
+
url="http://localhost:6333",
|
|
47
|
+
)
|
|
48
|
+
for point in result.data["points"]:
|
|
49
|
+
print(point["id"], point["payload"])
|
|
50
|
+
print(result.data["next_offset"])
|
|
51
|
+
|
|
43
52
|
# Bulk insert (all records embedded and upserted in one call)
|
|
44
53
|
result = run_query(
|
|
45
54
|
"""INSERT BULK INTO COLLECTION notes VALUES [
|
|
@@ -58,6 +67,13 @@ result = run_query(
|
|
|
58
67
|
for hit in result.data:
|
|
59
68
|
print(hit["score"], hit["payload"])
|
|
60
69
|
|
|
70
|
+
# Retrieve a point by ID
|
|
71
|
+
result = run_query(
|
|
72
|
+
"SELECT * FROM notes WHERE id = 1",
|
|
73
|
+
url="http://localhost:6333",
|
|
74
|
+
)
|
|
75
|
+
print(result.data) # {"id": "1", "payload": {...}}
|
|
76
|
+
|
|
61
77
|
# Delete by filter
|
|
62
78
|
result = run_query(
|
|
63
79
|
"DELETE FROM notes WHERE year < 2023",
|
|
@@ -111,7 +127,9 @@ class ExecutionResult:
|
|
|
111
127
|
| INSERT (dense) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
|
|
112
128
|
| INSERT (hybrid) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
|
|
113
129
|
| INSERT BULK | `None` (count in `result.message`) |
|
|
130
|
+
| SELECT | `{"id": str, "payload": dict}` or `None` when not found |
|
|
114
131
|
| SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
|
|
132
|
+
| SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| None}` |
|
|
115
133
|
| RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
|
|
116
134
|
| SHOW COLLECTIONS | `["name1", "name2", ...]` |
|
|
117
135
|
| CREATE COLLECTION | `None` |
|
|
@@ -36,6 +36,9 @@ SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
|
|
|
36
36
|
-- Hybrid with custom dense model
|
|
37
37
|
SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
|
|
38
38
|
|
|
39
|
+
-- Hybrid with explicit fusion strategy
|
|
40
|
+
SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID FUSION 'dbsf'
|
|
41
|
+
|
|
39
42
|
-- Hybrid with both custom
|
|
40
43
|
SEARCH docs SIMILAR TO 'hello' LIMIT 5
|
|
41
44
|
USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
|
|
@@ -159,7 +162,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
|
|
|
159
162
|
pytest tests/ -v
|
|
160
163
|
```
|
|
161
164
|
|
|
162
|
-
Expected output: **
|
|
165
|
+
Expected output: **405 tests passing**.
|
|
163
166
|
|
|
164
167
|
---
|
|
165
168
|
|
|
@@ -171,12 +174,14 @@ Expected output: **375 tests passing**.
|
|
|
171
174
|
| `Connection failed: ...` | Qdrant unreachable at given URL | Check that Qdrant is running and the URL is correct |
|
|
172
175
|
| `INSERT requires a 'text' field in VALUES` | `text` key missing from the VALUES dict | Add `'text': '...'` to your dict |
|
|
173
176
|
| `Vector dimension mismatch: collection '...' expects X dims, but model produces Y dims` | Model used in INSERT differs from the one used to create the collection | Use `USING MODEL` to specify the same model as the collection was created with |
|
|
174
|
-
| `Collection '...' does not exist` | SEARCH / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
|
|
175
|
-
| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax
|
|
177
|
+
| `Collection '...' does not exist` | SEARCH / SCROLL / SELECT / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
|
|
178
|
+
| `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax and supported statement list |
|
|
179
|
+
| `SELECT requires a string or integer point id, got '...'` | `SELECT` used with a non-ID filter value | Use `SELECT * FROM <collection> WHERE id = '<id>'` or an integer ID |
|
|
176
180
|
| `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` |
|
|
177
181
|
| `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character |
|
|
178
182
|
| `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` |
|
|
179
183
|
| `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group |
|
|
180
184
|
| `Qdrant error during SEARCH: ...` | Hybrid search on a non-hybrid collection, or wrong vector names | Ensure the collection was created with `HYBRID` before using `USING HYBRID` in INSERT/SEARCH |
|
|
185
|
+
| `Qdrant error during SCROLL: ...` | Qdrant rejected scroll request | Verify collection state, filter, and cursor (`AFTER`) value |
|
|
181
186
|
| `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` |
|
|
182
187
|
| `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SEARCH, RECOMMEND, Hybrid Search & Reranking
|
|
1
|
+
# SEARCH, SELECT, SCROLL, RECOMMEND, Hybrid Search & Reranking
|
|
2
2
|
|
|
3
3
|
---
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
|
|
|
14
14
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
|
|
15
15
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
|
|
16
16
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
|
|
17
|
-
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
|
|
17
|
+
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
|
|
18
18
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
|
|
19
19
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
|
|
20
20
|
SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
|
|
@@ -33,7 +33,7 @@ Search only papers published after 2020:
|
|
|
33
33
|
SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
-
Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF):
|
|
36
|
+
Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF by default):
|
|
37
37
|
```sql
|
|
38
38
|
SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 USING HYBRID
|
|
39
39
|
```
|
|
@@ -70,6 +70,28 @@ Results are displayed as a table with three columns:
|
|
|
70
70
|
|
|
71
71
|
---
|
|
72
72
|
|
|
73
|
+
## SELECT — retrieve a point by ID
|
|
74
|
+
|
|
75
|
+
Fetches a single point payload by exact point ID.
|
|
76
|
+
|
|
77
|
+
**Syntax:**
|
|
78
|
+
```sql
|
|
79
|
+
SELECT * FROM <collection_name> WHERE id = '<point_id>'
|
|
80
|
+
SELECT * FROM <collection_name> WHERE id = <integer_id>
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Examples:**
|
|
84
|
+
```sql
|
|
85
|
+
SELECT * FROM articles WHERE id = '3f2e1a4b-8c91-4d0e-b123-abc123def456'
|
|
86
|
+
SELECT * FROM articles WHERE id = 42
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
`SELECT` in this version is intentionally strict:
|
|
90
|
+
- only `*` projection is supported
|
|
91
|
+
- only `WHERE id = ...` is supported
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
73
95
|
## Query-Time Search Params (`EXACT`, `WITH`)
|
|
74
96
|
|
|
75
97
|
Use these when you want to debug retrieval quality or tune recall without changing collection-level settings.
|
|
@@ -98,15 +120,41 @@ SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true }
|
|
|
98
120
|
|
|
99
121
|
---
|
|
100
122
|
|
|
123
|
+
## SCROLL — pagination / browsing
|
|
124
|
+
|
|
125
|
+
Use `SCROLL` to iterate through points in a collection page by page.
|
|
126
|
+
|
|
127
|
+
**Syntax:**
|
|
128
|
+
```sql
|
|
129
|
+
SCROLL FROM <collection_name> LIMIT <n>
|
|
130
|
+
SCROLL FROM <collection_name> WHERE <filter> LIMIT <n>
|
|
131
|
+
SCROLL FROM <collection_name> AFTER '<point_id>' LIMIT <n>
|
|
132
|
+
SCROLL FROM <collection_name> WHERE <filter> AFTER <point_id> LIMIT <n>
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Examples:**
|
|
136
|
+
```sql
|
|
137
|
+
SCROLL FROM articles LIMIT 50
|
|
138
|
+
SCROLL FROM articles WHERE year >= 2024 LIMIT 50
|
|
139
|
+
SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Behavior:**
|
|
143
|
+
- Returns points in ID order with payloads.
|
|
144
|
+
- Returns a `next_offset` cursor when more points are available.
|
|
145
|
+
- Use `AFTER <next_offset>` to fetch the next page.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
101
149
|
## Hybrid Search (USING HYBRID)
|
|
102
150
|
|
|
103
|
-
Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query
|
|
151
|
+
Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query. By default QQL merges the two result sets with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm, and you can optionally switch to **DBSF** with a `FUSION` clause.
|
|
104
152
|
|
|
105
153
|
### How it works internally
|
|
106
154
|
|
|
107
155
|
1. Both a dense vector (`TextEmbedding`) and a sparse BM25 vector (`SparseTextEmbedding`) are generated from your query text.
|
|
108
156
|
2. Qdrant fetches the top candidates from each index independently (`prefetch limit = LIMIT × 4`).
|
|
109
|
-
3. The two result lists are merged using
|
|
157
|
+
3. The two result lists are merged using the selected fusion strategy (`RRF` by default, or `DBSF` when requested).
|
|
110
158
|
4. The final top-N results are returned.
|
|
111
159
|
|
|
112
160
|
### Step 1: Create a hybrid collection
|
|
@@ -139,6 +187,9 @@ SEARCH articles SIMILAR TO 'transformer architecture' LIMIT 10 USING HYBRID
|
|
|
139
187
|
-- Hybrid search with a WHERE filter
|
|
140
188
|
SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID WHERE year >= 2017
|
|
141
189
|
|
|
190
|
+
-- Hybrid with DBSF fusion
|
|
191
|
+
SEARCH articles SIMILAR TO 'hybrid retrieval' LIMIT 10 USING HYBRID FUSION 'dbsf'
|
|
192
|
+
|
|
142
193
|
-- Hybrid with custom dense model
|
|
143
194
|
SEARCH articles SIMILAR TO 'embeddings' LIMIT 5
|
|
144
195
|
USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
|
|
@@ -154,6 +205,7 @@ SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5
|
|
|
154
205
|
|---|---|
|
|
155
206
|
| Dense model | configured default (`sentence-transformers/all-MiniLM-L6-v2`) |
|
|
156
207
|
| Sparse model | `Qdrant/bm25` |
|
|
208
|
+
| Fusion | `rrf` |
|
|
157
209
|
|
|
158
210
|
### Dense vs. hybrid — when to use which
|
|
159
211
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "qql-cli"
|
|
3
|
-
version = "2.
|
|
3
|
+
version = "2.2.0"
|
|
4
4
|
description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { file = "LICENSE" }
|
|
@@ -180,6 +180,20 @@ class ShowCollectionsStmt:
|
|
|
180
180
|
pass
|
|
181
181
|
|
|
182
182
|
|
|
183
|
+
@dataclass(frozen=True)
|
|
184
|
+
class SelectStmt:
|
|
185
|
+
collection: str
|
|
186
|
+
point_id: str | int
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass(frozen=True)
|
|
190
|
+
class ScrollStmt:
|
|
191
|
+
collection: str
|
|
192
|
+
limit: int
|
|
193
|
+
query_filter: FilterExpr | None = None
|
|
194
|
+
after: str | int | None = None
|
|
195
|
+
|
|
196
|
+
|
|
183
197
|
@dataclass(frozen=True)
|
|
184
198
|
class SearchStmt:
|
|
185
199
|
collection: str
|
|
@@ -187,6 +201,7 @@ class SearchStmt:
|
|
|
187
201
|
limit: int
|
|
188
202
|
model: str | None # dense model; None → use config default
|
|
189
203
|
hybrid: bool = False # if True, use prefetch+RRF hybrid search
|
|
204
|
+
fusion: str | None = None # hybrid fusion strategy; None → default rrf
|
|
190
205
|
sparse_only: bool = False # if True, query only the sparse vector (no dense)
|
|
191
206
|
sparse_model: str | None = None # sparse model for hybrid/sparse-only; None → SparseEmbedder.DEFAULT_MODEL
|
|
192
207
|
query_filter: FilterExpr | None = None # optional WHERE clause; default keeps existing tests valid
|
|
@@ -225,6 +240,8 @@ ASTNode = (
|
|
|
225
240
|
| CreateIndexStmt
|
|
226
241
|
| DropCollectionStmt
|
|
227
242
|
| ShowCollectionsStmt
|
|
243
|
+
| SelectStmt
|
|
244
|
+
| ScrollStmt
|
|
228
245
|
| SearchStmt
|
|
229
246
|
| RecommendStmt
|
|
230
247
|
| DeleteStmt
|
|
@@ -49,10 +49,18 @@ Available statements:
|
|
|
49
49
|
[yellow]SHOW COLLECTIONS[/yellow]
|
|
50
50
|
List all collections in the connected Qdrant instance.
|
|
51
51
|
|
|
52
|
+
[yellow]SCROLL FROM[/yellow] <name> [yellow]LIMIT[/yellow] <n>
|
|
53
|
+
Paginate points by ID order.
|
|
54
|
+
Optional: [yellow]WHERE[/yellow] <filter>
|
|
55
|
+
Optional: [yellow]AFTER[/yellow] '<id>'|<int>
|
|
56
|
+
|
|
57
|
+
[yellow]SELECT * FROM[/yellow] <name> [yellow]WHERE id =[/yellow] '<id>'|<int>
|
|
58
|
+
Retrieve a single point by its ID and return its payload.
|
|
59
|
+
|
|
52
60
|
[yellow]SEARCH[/yellow] <name> [yellow]SIMILAR TO[/yellow] '<text>' [yellow]LIMIT[/yellow] <n>
|
|
53
61
|
Semantic search by vector similarity.
|
|
54
62
|
Optional: [yellow]USING MODEL[/yellow] '<model>'
|
|
55
|
-
Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>']
|
|
63
|
+
Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>']
|
|
56
64
|
Optional: [yellow]USING SPARSE[/yellow] [MODEL '<model>'] sparse-vector-only search
|
|
57
65
|
Optional: [yellow]WHERE[/yellow] <filter> (e.g. WHERE year > 2020 AND status = 'ok')
|
|
58
66
|
Optional: [yellow]RERANK[/yellow] [MODEL '<model>'] rerank results with a cross-encoder
|
|
@@ -400,5 +408,28 @@ def _run_and_print(executor: Executor, query: str) -> None:
|
|
|
400
408
|
console.print(table)
|
|
401
409
|
return
|
|
402
410
|
|
|
411
|
+
# Pretty-print scroll results
|
|
412
|
+
if isinstance(result.data, dict) and "points" in result.data and "next_offset" in result.data:
|
|
413
|
+
points = result.data["points"]
|
|
414
|
+
if points:
|
|
415
|
+
table = Table(show_header=True, header_style="bold cyan")
|
|
416
|
+
table.add_column("ID")
|
|
417
|
+
table.add_column("Payload")
|
|
418
|
+
for point in points:
|
|
419
|
+
table.add_row(point["id"], str(point["payload"]))
|
|
420
|
+
console.print(table)
|
|
421
|
+
if result.data["next_offset"] is not None:
|
|
422
|
+
console.print(f"[dim]next_offset: {result.data['next_offset']}[/dim]")
|
|
423
|
+
return
|
|
424
|
+
|
|
425
|
+
# Pretty-print SELECT result
|
|
426
|
+
if isinstance(result.data, dict) and "id" in result.data and "payload" in result.data:
|
|
427
|
+
table = Table(show_header=True, header_style="bold cyan")
|
|
428
|
+
table.add_column("ID")
|
|
429
|
+
table.add_column("Payload")
|
|
430
|
+
table.add_row(str(result.data["id"]), str(result.data["payload"]))
|
|
431
|
+
console.print(table)
|
|
432
|
+
return
|
|
433
|
+
|
|
403
434
|
# Fallback: print data as-is
|
|
404
435
|
console.print(result.data)
|
|
@@ -76,6 +76,8 @@ from .ast_nodes import (
|
|
|
76
76
|
QuantizationConfig,
|
|
77
77
|
QuantizationType,
|
|
78
78
|
RecommendStmt,
|
|
79
|
+
SelectStmt,
|
|
80
|
+
ScrollStmt,
|
|
79
81
|
SearchStmt,
|
|
80
82
|
SearchWith,
|
|
81
83
|
ShowCollectionsStmt,
|
|
@@ -115,6 +117,10 @@ class Executor:
|
|
|
115
117
|
return self._execute_drop(node)
|
|
116
118
|
if isinstance(node, ShowCollectionsStmt):
|
|
117
119
|
return self._execute_show(node)
|
|
120
|
+
if isinstance(node, ScrollStmt):
|
|
121
|
+
return self._execute_scroll(node)
|
|
122
|
+
if isinstance(node, SelectStmt):
|
|
123
|
+
return self._execute_select(node)
|
|
118
124
|
if isinstance(node, SearchStmt):
|
|
119
125
|
return self._execute_search(node)
|
|
120
126
|
if isinstance(node, RecommendStmt):
|
|
@@ -412,6 +418,65 @@ class Executor:
|
|
|
412
418
|
data=names,
|
|
413
419
|
)
|
|
414
420
|
|
|
421
|
+
def _execute_scroll(self, node: ScrollStmt) -> ExecutionResult:
|
|
422
|
+
if not self._client.collection_exists(node.collection):
|
|
423
|
+
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
|
|
424
|
+
|
|
425
|
+
scroll_filter: Filter | None = None
|
|
426
|
+
if node.query_filter is not None:
|
|
427
|
+
scroll_filter = self._wrap_as_filter(
|
|
428
|
+
self._build_qdrant_filter(node.query_filter)
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
records, next_offset = self._client.scroll(
|
|
433
|
+
collection_name=node.collection,
|
|
434
|
+
scroll_filter=scroll_filter,
|
|
435
|
+
limit=node.limit,
|
|
436
|
+
offset=node.after,
|
|
437
|
+
with_payload=True,
|
|
438
|
+
with_vectors=False,
|
|
439
|
+
)
|
|
440
|
+
except UnexpectedResponse as e:
|
|
441
|
+
raise QQLRuntimeError(f"Qdrant error during SCROLL: {e}") from e
|
|
442
|
+
|
|
443
|
+
points = [
|
|
444
|
+
{"id": str(rec.id), "payload": rec.payload or {}}
|
|
445
|
+
for rec in records
|
|
446
|
+
]
|
|
447
|
+
return ExecutionResult(
|
|
448
|
+
success=True,
|
|
449
|
+
message=f"Scrolled {len(points)} point(s) from '{node.collection}'",
|
|
450
|
+
data={"points": points, "next_offset": None if next_offset is None else str(next_offset)},
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def _execute_select(self, node: SelectStmt) -> ExecutionResult:
|
|
454
|
+
if not self._client.collection_exists(node.collection):
|
|
455
|
+
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
records = self._client.retrieve(
|
|
459
|
+
collection_name=node.collection,
|
|
460
|
+
ids=[node.point_id],
|
|
461
|
+
with_payload=True,
|
|
462
|
+
with_vectors=False,
|
|
463
|
+
)
|
|
464
|
+
except UnexpectedResponse as e:
|
|
465
|
+
raise QQLRuntimeError(f"Qdrant error during SELECT: {e}") from e
|
|
466
|
+
|
|
467
|
+
if not records:
|
|
468
|
+
return ExecutionResult(
|
|
469
|
+
success=True,
|
|
470
|
+
message=f"Point '{node.point_id}' not found in '{node.collection}'",
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
record = records[0]
|
|
474
|
+
return ExecutionResult(
|
|
475
|
+
success=True,
|
|
476
|
+
message=f"Retrieved point '{node.point_id}' from '{node.collection}'",
|
|
477
|
+
data={"id": str(record.id), "payload": record.payload or {}},
|
|
478
|
+
)
|
|
479
|
+
|
|
415
480
|
def _execute_search(self, node: SearchStmt) -> ExecutionResult:
|
|
416
481
|
if not self._client.collection_exists(node.collection):
|
|
417
482
|
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
|
|
@@ -429,7 +494,7 @@ class Executor:
|
|
|
429
494
|
# enough material to reorder; only `node.limit` results are returned.
|
|
430
495
|
fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit
|
|
431
496
|
|
|
432
|
-
# ── Hybrid SEARCH: prefetch dense+sparse, fuse with
|
|
497
|
+
# ── Hybrid SEARCH: prefetch dense+sparse, fuse with the requested strategy ──
|
|
433
498
|
if node.hybrid:
|
|
434
499
|
dense_model = node.model or self._config.default_model
|
|
435
500
|
sparse_model_name = node.sparse_model or SparseEmbedder.DEFAULT_MODEL
|
|
@@ -460,7 +525,7 @@ class Executor:
|
|
|
460
525
|
params=search_params,
|
|
461
526
|
),
|
|
462
527
|
],
|
|
463
|
-
query=FusionQuery(fusion=
|
|
528
|
+
query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)),
|
|
464
529
|
limit=fetch_limit,
|
|
465
530
|
query_filter=qdrant_filter,
|
|
466
531
|
)
|
|
@@ -563,6 +628,15 @@ class Executor:
|
|
|
563
628
|
data=results,
|
|
564
629
|
)
|
|
565
630
|
|
|
631
|
+
def _resolve_hybrid_fusion(self, fusion: str | None) -> Fusion:
|
|
632
|
+
if fusion is None or fusion == "rrf":
|
|
633
|
+
return Fusion.RRF
|
|
634
|
+
if fusion == "dbsf":
|
|
635
|
+
return Fusion.DBSF
|
|
636
|
+
raise QQLRuntimeError(
|
|
637
|
+
f"Unsupported hybrid fusion '{fusion}'; expected 'rrf' or 'dbsf'"
|
|
638
|
+
)
|
|
639
|
+
|
|
566
640
|
def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult:
|
|
567
641
|
if not self._client.collection_exists(node.collection):
|
|
568
642
|
raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
|
|
@@ -14,6 +14,7 @@ class TokenKind(Enum):
|
|
|
14
14
|
USING = auto()
|
|
15
15
|
MODEL = auto()
|
|
16
16
|
HYBRID = auto()
|
|
17
|
+
FUSION = auto()
|
|
17
18
|
DENSE = auto()
|
|
18
19
|
SPARSE = auto()
|
|
19
20
|
RERANK = auto()
|
|
@@ -34,7 +35,9 @@ class TokenKind(Enum):
|
|
|
34
35
|
ON = auto()
|
|
35
36
|
DROP = auto()
|
|
36
37
|
SHOW = auto()
|
|
38
|
+
SELECT = auto()
|
|
37
39
|
COLLECTIONS = auto()
|
|
40
|
+
SCROLL = auto()
|
|
38
41
|
SEARCH = auto()
|
|
39
42
|
RECOMMEND = auto()
|
|
40
43
|
POSITIVE = auto()
|
|
@@ -47,6 +50,7 @@ class TokenKind(Enum):
|
|
|
47
50
|
OFFSET = auto()
|
|
48
51
|
SCORE = auto()
|
|
49
52
|
THRESHOLD = auto()
|
|
53
|
+
AFTER = auto()
|
|
50
54
|
LOOKUP = auto()
|
|
51
55
|
VECTOR = auto()
|
|
52
56
|
DELETE = auto()
|
|
@@ -79,6 +83,7 @@ class TokenKind(Enum):
|
|
|
79
83
|
RBRACKET = auto()
|
|
80
84
|
LPAREN = auto()
|
|
81
85
|
RPAREN = auto()
|
|
86
|
+
STAR = auto()
|
|
82
87
|
COLON = auto()
|
|
83
88
|
COMMA = auto()
|
|
84
89
|
EQUALS = auto()
|
|
@@ -102,6 +107,7 @@ _KEYWORDS: dict[str, TokenKind] = {
|
|
|
102
107
|
"USING": TokenKind.USING,
|
|
103
108
|
"MODEL": TokenKind.MODEL,
|
|
104
109
|
"HYBRID": TokenKind.HYBRID,
|
|
110
|
+
"FUSION": TokenKind.FUSION,
|
|
105
111
|
"DENSE": TokenKind.DENSE,
|
|
106
112
|
"SPARSE": TokenKind.SPARSE,
|
|
107
113
|
"RERANK": TokenKind.RERANK,
|
|
@@ -122,7 +128,9 @@ _KEYWORDS: dict[str, TokenKind] = {
|
|
|
122
128
|
"ON": TokenKind.ON,
|
|
123
129
|
"DROP": TokenKind.DROP,
|
|
124
130
|
"SHOW": TokenKind.SHOW,
|
|
131
|
+
"SELECT": TokenKind.SELECT,
|
|
125
132
|
"COLLECTIONS": TokenKind.COLLECTIONS,
|
|
133
|
+
"SCROLL": TokenKind.SCROLL,
|
|
126
134
|
"SEARCH": TokenKind.SEARCH,
|
|
127
135
|
"RECOMMEND": TokenKind.RECOMMEND,
|
|
128
136
|
"POSITIVE": TokenKind.POSITIVE,
|
|
@@ -135,6 +143,7 @@ _KEYWORDS: dict[str, TokenKind] = {
|
|
|
135
143
|
"OFFSET": TokenKind.OFFSET,
|
|
136
144
|
"SCORE": TokenKind.SCORE,
|
|
137
145
|
"THRESHOLD": TokenKind.THRESHOLD,
|
|
146
|
+
"AFTER": TokenKind.AFTER,
|
|
138
147
|
"LOOKUP": TokenKind.LOOKUP,
|
|
139
148
|
"VECTOR": TokenKind.VECTOR,
|
|
140
149
|
"DELETE": TokenKind.DELETE,
|
|
@@ -197,6 +206,9 @@ class Lexer:
|
|
|
197
206
|
elif ch == ")":
|
|
198
207
|
tokens.append(Token(TokenKind.RPAREN, ")", i))
|
|
199
208
|
i += 1
|
|
209
|
+
elif ch == "*":
|
|
210
|
+
tokens.append(Token(TokenKind.STAR, "*", i))
|
|
211
|
+
i += 1
|
|
200
212
|
elif ch == ":":
|
|
201
213
|
tokens.append(Token(TokenKind.COLON, ":", i))
|
|
202
214
|
i += 1
|
|
@@ -26,6 +26,8 @@ from .ast_nodes import (
|
|
|
26
26
|
QuantizationConfig,
|
|
27
27
|
QuantizationType,
|
|
28
28
|
RecommendStmt,
|
|
29
|
+
SelectStmt,
|
|
30
|
+
ScrollStmt,
|
|
29
31
|
SearchStmt,
|
|
30
32
|
SearchWith,
|
|
31
33
|
ShowCollectionsStmt,
|
|
@@ -43,6 +45,8 @@ _CMP_OPS: dict[TokenKind, str] = {
|
|
|
43
45
|
TokenKind.LTE: "<=",
|
|
44
46
|
}
|
|
45
47
|
|
|
48
|
+
_HYBRID_FUSION_VALUES = {"rrf", "dbsf"}
|
|
49
|
+
|
|
46
50
|
|
|
47
51
|
class Parser:
|
|
48
52
|
def __init__(self, tokens: list[Token]) -> None:
|
|
@@ -61,6 +65,10 @@ class Parser:
|
|
|
61
65
|
node = self._parse_drop()
|
|
62
66
|
elif tok.kind == TokenKind.SHOW:
|
|
63
67
|
node = self._parse_show()
|
|
68
|
+
elif tok.kind == TokenKind.SCROLL:
|
|
69
|
+
node = self._parse_scroll()
|
|
70
|
+
elif tok.kind == TokenKind.SELECT:
|
|
71
|
+
node = self._parse_select()
|
|
64
72
|
elif tok.kind == TokenKind.SEARCH:
|
|
65
73
|
node = self._parse_search()
|
|
66
74
|
elif tok.kind == TokenKind.RECOMMEND:
|
|
@@ -288,6 +296,43 @@ class Parser:
|
|
|
288
296
|
self._expect(TokenKind.COLLECTIONS)
|
|
289
297
|
return ShowCollectionsStmt()
|
|
290
298
|
|
|
299
|
+
def _parse_scroll(self) -> ScrollStmt:
|
|
300
|
+
self._expect(TokenKind.SCROLL)
|
|
301
|
+
self._expect(TokenKind.FROM)
|
|
302
|
+
collection = self._parse_identifier()
|
|
303
|
+
|
|
304
|
+
query_filter: FilterExpr | None = None
|
|
305
|
+
after: str | int | None = None
|
|
306
|
+
|
|
307
|
+
if self._peek().kind == TokenKind.WHERE:
|
|
308
|
+
self._advance()
|
|
309
|
+
query_filter = self._parse_filter_expr()
|
|
310
|
+
|
|
311
|
+
if self._peek().kind == TokenKind.AFTER:
|
|
312
|
+
self._advance()
|
|
313
|
+
after = self._parse_point_id_value("SCROLL AFTER")
|
|
314
|
+
|
|
315
|
+
self._expect(TokenKind.LIMIT)
|
|
316
|
+
limit = int(self._expect(TokenKind.INTEGER).value)
|
|
317
|
+
|
|
318
|
+
return ScrollStmt(
|
|
319
|
+
collection=collection,
|
|
320
|
+
limit=limit,
|
|
321
|
+
query_filter=query_filter,
|
|
322
|
+
after=after,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _parse_select(self) -> SelectStmt:
|
|
326
|
+
self._expect(TokenKind.SELECT)
|
|
327
|
+
self._expect(TokenKind.STAR)
|
|
328
|
+
self._expect(TokenKind.FROM)
|
|
329
|
+
collection = self._parse_identifier()
|
|
330
|
+
self._expect(TokenKind.WHERE)
|
|
331
|
+
self._expect(TokenKind.ID)
|
|
332
|
+
self._expect(TokenKind.EQUALS)
|
|
333
|
+
point_id = self._parse_point_id_value("SELECT")
|
|
334
|
+
return SelectStmt(collection=collection, point_id=point_id)
|
|
335
|
+
|
|
291
336
|
def _parse_search(self) -> SearchStmt:
|
|
292
337
|
self._expect(TokenKind.SEARCH)
|
|
293
338
|
collection = self._parse_identifier()
|
|
@@ -304,6 +349,7 @@ class Parser:
|
|
|
304
349
|
|
|
305
350
|
model: str | None = None
|
|
306
351
|
hybrid: bool = False
|
|
352
|
+
fusion: str | None = None
|
|
307
353
|
sparse_only: bool = False
|
|
308
354
|
sparse_model: str | None = None
|
|
309
355
|
if self._peek().kind == TokenKind.USING:
|
|
@@ -311,9 +357,18 @@ class Parser:
|
|
|
311
357
|
if self._peek().kind == TokenKind.HYBRID:
|
|
312
358
|
self._advance() # consume HYBRID
|
|
313
359
|
hybrid = True
|
|
314
|
-
# Optional DENSE MODEL
|
|
315
|
-
while self._peek().kind in (TokenKind.DENSE, TokenKind.SPARSE):
|
|
360
|
+
# Optional FUSION / DENSE MODEL / SPARSE MODEL sub-clauses, any order.
|
|
361
|
+
while self._peek().kind in (TokenKind.FUSION, TokenKind.DENSE, TokenKind.SPARSE):
|
|
316
362
|
sub = self._advance()
|
|
363
|
+
if sub.kind == TokenKind.FUSION:
|
|
364
|
+
value_tok = self._expect(TokenKind.STRING)
|
|
365
|
+
fusion = value_tok.value.lower()
|
|
366
|
+
if fusion not in _HYBRID_FUSION_VALUES:
|
|
367
|
+
raise QQLSyntaxError(
|
|
368
|
+
f"Unsupported hybrid fusion '{value_tok.value}'; expected 'rrf' or 'dbsf'",
|
|
369
|
+
value_tok.pos,
|
|
370
|
+
)
|
|
371
|
+
continue
|
|
317
372
|
self._expect(TokenKind.MODEL)
|
|
318
373
|
m = self._expect(TokenKind.STRING).value
|
|
319
374
|
if sub.kind == TokenKind.DENSE:
|
|
@@ -368,6 +423,7 @@ class Parser:
|
|
|
368
423
|
limit=limit,
|
|
369
424
|
model=model,
|
|
370
425
|
hybrid=hybrid,
|
|
426
|
+
fusion=fusion,
|
|
371
427
|
sparse_only=sparse_only,
|
|
372
428
|
sparse_model=sparse_model,
|
|
373
429
|
query_filter=query_filter,
|
|
@@ -457,17 +513,7 @@ class Parser:
|
|
|
457
513
|
if self._peek().kind == TokenKind.ID:
|
|
458
514
|
self._advance()
|
|
459
515
|
self._expect(TokenKind.EQUALS)
|
|
460
|
-
|
|
461
|
-
if tok.kind == TokenKind.STRING:
|
|
462
|
-
self._advance()
|
|
463
|
-
point_id: str | int = tok.value
|
|
464
|
-
elif tok.kind == TokenKind.INTEGER:
|
|
465
|
-
self._advance()
|
|
466
|
-
point_id = int(tok.value)
|
|
467
|
-
else:
|
|
468
|
-
raise QQLSyntaxError(
|
|
469
|
-
f"Expected string or integer for point id, got '{tok.value}'", tok.pos
|
|
470
|
-
)
|
|
516
|
+
point_id = self._parse_point_id_value("DELETE")
|
|
471
517
|
return DeleteStmt(collection=collection, point_id=point_id)
|
|
472
518
|
|
|
473
519
|
query_filter = self._parse_filter_expr()
|
|
@@ -694,6 +740,19 @@ class Parser:
|
|
|
694
740
|
self._expect(TokenKind.RPAREN)
|
|
695
741
|
return tuple(items)
|
|
696
742
|
|
|
743
|
+
def _parse_point_id_value(self, statement: str) -> str | int:
|
|
744
|
+
tok = self._peek()
|
|
745
|
+
if tok.kind == TokenKind.STRING:
|
|
746
|
+
self._advance()
|
|
747
|
+
return tok.value
|
|
748
|
+
if tok.kind == TokenKind.INTEGER:
|
|
749
|
+
self._advance()
|
|
750
|
+
return int(tok.value)
|
|
751
|
+
raise QQLSyntaxError(
|
|
752
|
+
f"{statement} requires a string or integer point id, got '{tok.value}'",
|
|
753
|
+
tok.pos,
|
|
754
|
+
)
|
|
755
|
+
|
|
697
756
|
# ── Dict / value parsers (for INSERT VALUES) ──────────────────────────
|
|
698
757
|
|
|
699
758
|
def _parse_identifier(self) -> str:
|
|
@@ -24,6 +24,8 @@ _STMT_STARTERS = {
|
|
|
24
24
|
TokenKind.CREATE,
|
|
25
25
|
TokenKind.DROP,
|
|
26
26
|
TokenKind.SHOW,
|
|
27
|
+
TokenKind.SELECT,
|
|
28
|
+
TokenKind.SCROLL,
|
|
27
29
|
TokenKind.SEARCH,
|
|
28
30
|
TokenKind.RECOMMEND,
|
|
29
31
|
TokenKind.DELETE,
|
|
@@ -54,7 +56,7 @@ def split_statements(tokens: list[Token]) -> list[list[Token]]:
|
|
|
54
56
|
"""Split a flat token list into per-statement chunks.
|
|
55
57
|
|
|
56
58
|
A new chunk begins whenever a statement-starter keyword (INSERT, CREATE,
|
|
57
|
-
DROP, SHOW, SEARCH, RECOMMEND, DELETE) is encountered at
|
|
59
|
+
DROP, SHOW, SCROLL, SELECT, SEARCH, RECOMMEND, DELETE) is encountered at
|
|
58
60
|
brace/bracket/paren depth 0.
|
|
59
61
|
The EOF sentinel is consumed and never included in any chunk.
|
|
60
62
|
"""
|
|
@@ -10,6 +10,8 @@ from qql.ast_nodes import (
|
|
|
10
10
|
QuantizationConfig,
|
|
11
11
|
QuantizationType,
|
|
12
12
|
RecommendStmt,
|
|
13
|
+
SelectStmt,
|
|
14
|
+
ScrollStmt,
|
|
13
15
|
SearchStmt,
|
|
14
16
|
SearchWith,
|
|
15
17
|
ShowCollectionsStmt,
|
|
@@ -357,6 +359,101 @@ class TestShow:
|
|
|
357
359
|
assert "docs" in result.data
|
|
358
360
|
|
|
359
361
|
|
|
362
|
+
class TestScroll:
|
|
363
|
+
def test_scroll_returns_points_and_next_offset(self, executor, mock_client, mocker):
|
|
364
|
+
mock_client.collection_exists.return_value = True
|
|
365
|
+
rec1 = mocker.MagicMock()
|
|
366
|
+
rec1.id = "a"
|
|
367
|
+
rec1.payload = {"text": "first"}
|
|
368
|
+
rec2 = mocker.MagicMock()
|
|
369
|
+
rec2.id = 2
|
|
370
|
+
rec2.payload = {"text": "second"}
|
|
371
|
+
mock_client.scroll.return_value = ([rec1, rec2], "next-1")
|
|
372
|
+
|
|
373
|
+
node = ScrollStmt(collection="notes", limit=2)
|
|
374
|
+
result = executor.execute(node)
|
|
375
|
+
|
|
376
|
+
mock_client.scroll.assert_called_once_with(
|
|
377
|
+
collection_name="notes",
|
|
378
|
+
scroll_filter=None,
|
|
379
|
+
limit=2,
|
|
380
|
+
offset=None,
|
|
381
|
+
with_payload=True,
|
|
382
|
+
with_vectors=False,
|
|
383
|
+
)
|
|
384
|
+
assert result.success is True
|
|
385
|
+
assert result.data == {
|
|
386
|
+
"points": [
|
|
387
|
+
{"id": "a", "payload": {"text": "first"}},
|
|
388
|
+
{"id": "2", "payload": {"text": "second"}},
|
|
389
|
+
],
|
|
390
|
+
"next_offset": "next-1",
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
def test_scroll_with_after_and_filter(self, executor, mock_client, mocker):
|
|
394
|
+
from qql.ast_nodes import CompareExpr
|
|
395
|
+
from qdrant_client.models import Filter
|
|
396
|
+
|
|
397
|
+
mock_client.collection_exists.return_value = True
|
|
398
|
+
mock_client.scroll.return_value = ([], None)
|
|
399
|
+
|
|
400
|
+
node = ScrollStmt(
|
|
401
|
+
collection="notes",
|
|
402
|
+
limit=10,
|
|
403
|
+
after="cursor-id",
|
|
404
|
+
query_filter=CompareExpr(field="year", op=">=", value=2024),
|
|
405
|
+
)
|
|
406
|
+
executor.execute(node)
|
|
407
|
+
|
|
408
|
+
kwargs = mock_client.scroll.call_args.kwargs
|
|
409
|
+
assert kwargs["offset"] == "cursor-id"
|
|
410
|
+
assert isinstance(kwargs["scroll_filter"], Filter)
|
|
411
|
+
|
|
412
|
+
def test_scroll_nonexistent_collection_raises(self, executor, mock_client):
|
|
413
|
+
mock_client.collection_exists.return_value = False
|
|
414
|
+
node = ScrollStmt(collection="ghost", limit=5)
|
|
415
|
+
with pytest.raises(QQLRuntimeError, match="does not exist"):
|
|
416
|
+
executor.execute(node)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class TestSelect:
|
|
420
|
+
def test_select_by_id_returns_payload(self, executor, mock_client, mocker):
|
|
421
|
+
mock_client.collection_exists.return_value = True
|
|
422
|
+
rec = mocker.MagicMock()
|
|
423
|
+
rec.id = "abc-123"
|
|
424
|
+
rec.payload = {"text": "hello", "year": 2024}
|
|
425
|
+
mock_client.retrieve.return_value = [rec]
|
|
426
|
+
|
|
427
|
+
node = SelectStmt(collection="notes", point_id="abc-123")
|
|
428
|
+
result = executor.execute(node)
|
|
429
|
+
|
|
430
|
+
mock_client.retrieve.assert_called_once_with(
|
|
431
|
+
collection_name="notes",
|
|
432
|
+
ids=["abc-123"],
|
|
433
|
+
with_payload=True,
|
|
434
|
+
with_vectors=False,
|
|
435
|
+
)
|
|
436
|
+
assert result.success is True
|
|
437
|
+
assert result.data == {"id": "abc-123", "payload": {"text": "hello", "year": 2024}}
|
|
438
|
+
|
|
439
|
+
def test_select_not_found(self, executor, mock_client):
|
|
440
|
+
mock_client.collection_exists.return_value = True
|
|
441
|
+
mock_client.retrieve.return_value = []
|
|
442
|
+
|
|
443
|
+
node = SelectStmt(collection="notes", point_id=7)
|
|
444
|
+
result = executor.execute(node)
|
|
445
|
+
|
|
446
|
+
assert result.success is True
|
|
447
|
+
assert "not found" in result.message
|
|
448
|
+
assert result.data is None
|
|
449
|
+
|
|
450
|
+
def test_select_nonexistent_collection_raises(self, executor, mock_client):
|
|
451
|
+
mock_client.collection_exists.return_value = False
|
|
452
|
+
node = SelectStmt(collection="ghost", point_id="x")
|
|
453
|
+
with pytest.raises(QQLRuntimeError, match="does not exist"):
|
|
454
|
+
executor.execute(node)
|
|
455
|
+
|
|
456
|
+
|
|
360
457
|
class TestSearch:
|
|
361
458
|
def test_search_calls_qdrant_query_points(self, executor, mock_client, mocker):
|
|
362
459
|
mock_client.collection_exists.return_value = True
|
|
@@ -1063,6 +1160,29 @@ class TestHybridSearch:
|
|
|
1063
1160
|
assert isinstance(kw["query"], FusionQuery)
|
|
1064
1161
|
assert kw["query"].fusion == Fusion.RRF
|
|
1065
1162
|
|
|
1163
|
+
def test_hybrid_search_uses_dbsf_fusion(
|
|
1164
|
+
self, executor, mock_client, mock_sparse_embedder, mocker
|
|
1165
|
+
):
|
|
1166
|
+
from qdrant_client.models import Fusion, FusionQuery
|
|
1167
|
+
|
|
1168
|
+
mock_client.collection_exists.return_value = True
|
|
1169
|
+
mock_resp = mocker.MagicMock()
|
|
1170
|
+
mock_resp.points = []
|
|
1171
|
+
mock_client.query_points.return_value = mock_resp
|
|
1172
|
+
|
|
1173
|
+
node = SearchStmt(
|
|
1174
|
+
collection="col",
|
|
1175
|
+
query_text="q",
|
|
1176
|
+
limit=5,
|
|
1177
|
+
model=None,
|
|
1178
|
+
hybrid=True,
|
|
1179
|
+
fusion="dbsf",
|
|
1180
|
+
)
|
|
1181
|
+
executor.execute(node)
|
|
1182
|
+
kw = mock_client.query_points.call_args.kwargs
|
|
1183
|
+
assert isinstance(kw["query"], FusionQuery)
|
|
1184
|
+
assert kw["query"].fusion == Fusion.DBSF
|
|
1185
|
+
|
|
1066
1186
|
def test_hybrid_search_prefetch_limit_is_4x(
|
|
1067
1187
|
self, executor, mock_client, mock_sparse_embedder, mocker
|
|
1068
1188
|
):
|
|
@@ -39,6 +39,20 @@ class TestKeywords:
|
|
|
39
39
|
assert ks[3] == TokenKind.TO
|
|
40
40
|
assert ks[5] == TokenKind.LIMIT
|
|
41
41
|
|
|
42
|
+
def test_scroll_keywords(self):
|
|
43
|
+
ks = kinds("SCROLL FROM docs AFTER 'cursor-id' LIMIT 50")
|
|
44
|
+
assert ks[0] == TokenKind.SCROLL
|
|
45
|
+
assert ks[1] == TokenKind.FROM
|
|
46
|
+
assert TokenKind.AFTER in ks
|
|
47
|
+
assert TokenKind.LIMIT in ks
|
|
48
|
+
|
|
49
|
+
def test_select_keywords(self):
|
|
50
|
+
ks = kinds("SELECT * FROM notes WHERE id = 'abc'")
|
|
51
|
+
assert ks[0] == TokenKind.SELECT
|
|
52
|
+
assert ks[1] == TokenKind.STAR
|
|
53
|
+
assert ks[2] == TokenKind.FROM
|
|
54
|
+
assert ks[4] == TokenKind.WHERE
|
|
55
|
+
|
|
42
56
|
def test_delete_keywords(self):
|
|
43
57
|
ks = kinds("DELETE FROM foo WHERE id = 'abc'")
|
|
44
58
|
assert ks[:4] == [TokenKind.DELETE, TokenKind.FROM, TokenKind.IDENTIFIER, TokenKind.WHERE]
|
|
@@ -89,6 +103,10 @@ class TestPunctuation:
|
|
|
89
103
|
assert ks[0] == TokenKind.LBRACKET
|
|
90
104
|
assert ks[-2] == TokenKind.RBRACKET
|
|
91
105
|
|
|
106
|
+
def test_star(self):
|
|
107
|
+
ks = kinds("*")
|
|
108
|
+
assert ks[0] == TokenKind.STAR
|
|
109
|
+
|
|
92
110
|
|
|
93
111
|
class TestErrors:
|
|
94
112
|
def test_unterminated_string(self):
|
|
@@ -212,6 +230,10 @@ class TestHybridKeyword:
|
|
|
212
230
|
ks = kinds("sparse")
|
|
213
231
|
assert ks[0] == TokenKind.SPARSE
|
|
214
232
|
|
|
233
|
+
def test_fusion_keyword(self):
|
|
234
|
+
ks = kinds("FUSION")
|
|
235
|
+
assert ks[0] == TokenKind.FUSION
|
|
236
|
+
|
|
215
237
|
def test_hybrid_in_create_statement(self):
|
|
216
238
|
ks = kinds("CREATE COLLECTION articles HYBRID")
|
|
217
239
|
assert ks[3] == TokenKind.HYBRID
|
|
@@ -24,6 +24,8 @@ from qql.ast_nodes import (
|
|
|
24
24
|
QuantizationConfig,
|
|
25
25
|
QuantizationType,
|
|
26
26
|
RecommendStmt,
|
|
27
|
+
SelectStmt,
|
|
28
|
+
ScrollStmt,
|
|
27
29
|
SearchStmt,
|
|
28
30
|
SearchWith,
|
|
29
31
|
ShowCollectionsStmt,
|
|
@@ -189,6 +191,51 @@ class TestShow:
|
|
|
189
191
|
assert isinstance(node, ShowCollectionsStmt)
|
|
190
192
|
|
|
191
193
|
|
|
194
|
+
class TestScroll:
|
|
195
|
+
def test_scroll_basic(self):
|
|
196
|
+
node = parse("SCROLL FROM docs LIMIT 50")
|
|
197
|
+
assert isinstance(node, ScrollStmt)
|
|
198
|
+
assert node.collection == "docs"
|
|
199
|
+
assert node.limit == 50
|
|
200
|
+
assert node.query_filter is None
|
|
201
|
+
assert node.after is None
|
|
202
|
+
|
|
203
|
+
def test_scroll_with_where(self):
|
|
204
|
+
node = parse("SCROLL FROM docs WHERE year >= 2024 LIMIT 50")
|
|
205
|
+
assert isinstance(node, ScrollStmt)
|
|
206
|
+
assert isinstance(node.query_filter, CompareExpr)
|
|
207
|
+
assert node.query_filter.field == "year"
|
|
208
|
+
assert node.after is None
|
|
209
|
+
|
|
210
|
+
def test_scroll_with_after(self):
|
|
211
|
+
node = parse("SCROLL FROM docs AFTER 'cursor-id' LIMIT 50")
|
|
212
|
+
assert isinstance(node, ScrollStmt)
|
|
213
|
+
assert node.after == "cursor-id"
|
|
214
|
+
|
|
215
|
+
def test_scroll_with_where_and_after(self):
|
|
216
|
+
node = parse("SCROLL FROM docs WHERE year >= 2024 AFTER 42 LIMIT 50")
|
|
217
|
+
assert isinstance(node, ScrollStmt)
|
|
218
|
+
assert node.after == 42
|
|
219
|
+
assert isinstance(node.query_filter, CompareExpr)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class TestSelect:
|
|
223
|
+
def test_select_by_string_id(self):
|
|
224
|
+
node = parse("SELECT * FROM notes WHERE id = 'abc-123'")
|
|
225
|
+
assert isinstance(node, SelectStmt)
|
|
226
|
+
assert node.collection == "notes"
|
|
227
|
+
assert node.point_id == "abc-123"
|
|
228
|
+
|
|
229
|
+
def test_select_by_integer_id(self):
|
|
230
|
+
node = parse("SELECT * FROM notes WHERE id = 42")
|
|
231
|
+
assert isinstance(node, SelectStmt)
|
|
232
|
+
assert node.point_id == 42
|
|
233
|
+
|
|
234
|
+
def test_select_requires_id_filter(self):
|
|
235
|
+
with pytest.raises(QQLSyntaxError):
|
|
236
|
+
parse("SELECT * FROM notes WHERE year = 2024")
|
|
237
|
+
|
|
238
|
+
|
|
192
239
|
class TestSearch:
|
|
193
240
|
def test_basic_search(self):
|
|
194
241
|
node = parse("SEARCH notes SIMILAR TO 'hello world' LIMIT 5")
|
|
@@ -334,7 +381,7 @@ class TestRecommend:
|
|
|
334
381
|
class TestErrors:
|
|
335
382
|
def test_unknown_keyword(self):
|
|
336
383
|
with pytest.raises(QQLSyntaxError):
|
|
337
|
-
parse("
|
|
384
|
+
parse("UPSERT INTO foo VALUES {'text': 'x'}")
|
|
338
385
|
|
|
339
386
|
def test_missing_collection_name(self):
|
|
340
387
|
with pytest.raises(QQLSyntaxError):
|
|
@@ -704,6 +751,24 @@ class TestHybridSearch:
|
|
|
704
751
|
assert isinstance(node.query_filter, CompareExpr)
|
|
705
752
|
assert node.query_filter.field == "year"
|
|
706
753
|
|
|
754
|
+
def test_search_hybrid_with_dbsf_fusion(self):
|
|
755
|
+
node = parse(
|
|
756
|
+
"SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'dbsf'"
|
|
757
|
+
)
|
|
758
|
+
assert node.hybrid is True
|
|
759
|
+
assert node.fusion == "dbsf"
|
|
760
|
+
|
|
761
|
+
def test_search_hybrid_with_fusion_and_models(self):
|
|
762
|
+
node = parse(
|
|
763
|
+
"SEARCH docs SIMILAR TO 'q' LIMIT 10 "
|
|
764
|
+
"USING HYBRID FUSION 'rrf' SPARSE MODEL 'Qdrant/bm25' "
|
|
765
|
+
"DENSE MODEL 'BAAI/bge-base-en-v1.5'"
|
|
766
|
+
)
|
|
767
|
+
assert node.hybrid is True
|
|
768
|
+
assert node.fusion == "rrf"
|
|
769
|
+
assert node.sparse_model == "Qdrant/bm25"
|
|
770
|
+
assert node.model == "BAAI/bge-base-en-v1.5"
|
|
771
|
+
|
|
707
772
|
def test_search_hybrid_dense_model_and_where(self):
|
|
708
773
|
node = parse(
|
|
709
774
|
"SEARCH articles SIMILAR TO 'ml' LIMIT 10 "
|
|
@@ -713,6 +778,10 @@ class TestHybridSearch:
|
|
|
713
778
|
assert node.model == "BAAI/bge-small-en-v1.5"
|
|
714
779
|
assert isinstance(node.query_filter, CompareExpr)
|
|
715
780
|
|
|
781
|
+
def test_search_hybrid_rejects_unknown_fusion(self):
|
|
782
|
+
with pytest.raises(QQLSyntaxError, match="Unsupported hybrid fusion"):
|
|
783
|
+
parse("SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'x'")
|
|
784
|
+
|
|
716
785
|
def test_search_hybrid_limit_preserved(self):
|
|
717
786
|
node = parse("SEARCH col SIMILAR TO 'q' LIMIT 7 USING HYBRID")
|
|
718
787
|
assert node.limit == 7
|
|
@@ -111,6 +111,30 @@ class TestSplitStatements:
|
|
|
111
111
|
assert len(chunks) == 3
|
|
112
112
|
assert chunks[1][0].kind == TokenKind.RECOMMEND
|
|
113
113
|
|
|
114
|
+
def test_scroll_starts_new_top_level_statement(self):
|
|
115
|
+
from qql.lexer import TokenKind
|
|
116
|
+
|
|
117
|
+
tokens = tokenize(
|
|
118
|
+
"SHOW COLLECTIONS\n"
|
|
119
|
+
"SCROLL FROM x LIMIT 10\n"
|
|
120
|
+
"DROP COLLECTION x"
|
|
121
|
+
)
|
|
122
|
+
chunks = split_statements(tokens)
|
|
123
|
+
assert len(chunks) == 3
|
|
124
|
+
assert chunks[1][0].kind == TokenKind.SCROLL
|
|
125
|
+
|
|
126
|
+
def test_select_starts_new_top_level_statement(self):
|
|
127
|
+
from qql.lexer import TokenKind
|
|
128
|
+
|
|
129
|
+
tokens = tokenize(
|
|
130
|
+
"SHOW COLLECTIONS\n"
|
|
131
|
+
"SELECT * FROM x WHERE id = 'id-1'\n"
|
|
132
|
+
"DROP COLLECTION x"
|
|
133
|
+
)
|
|
134
|
+
chunks = split_statements(tokens)
|
|
135
|
+
assert len(chunks) == 3
|
|
136
|
+
assert chunks[1][0].kind == TokenKind.SELECT
|
|
137
|
+
|
|
114
138
|
|
|
115
139
|
# ── run_script ────────────────────────────────────────────────────────────────
|
|
116
140
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|