qql-cli 2.1.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {qql_cli-2.1.0 → qql_cli-2.2.0}/PKG-INFO +15 -6
  2. {qql_cli-2.1.0 → qql_cli-2.2.0}/README.md +14 -5
  3. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/getting-started.md +8 -2
  4. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/index.html +3 -3
  5. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/programmatic.md +18 -0
  6. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/reference.md +8 -3
  7. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/search.md +57 -5
  8. {qql_cli-2.1.0 → qql_cli-2.2.0}/pyproject.toml +1 -1
  9. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/ast_nodes.py +17 -0
  10. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/cli.py +32 -1
  11. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/executor.py +76 -2
  12. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/lexer.py +12 -0
  13. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/parser.py +72 -13
  14. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/script.py +3 -1
  15. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_executor.py +120 -0
  16. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_lexer.py +22 -0
  17. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_parser.py +70 -1
  18. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_script.py +24 -0
  19. {qql_cli-2.1.0 → qql_cli-2.2.0}/.github/workflows/ci.yml +0 -0
  20. {qql_cli-2.1.0 → qql_cli-2.2.0}/.github/workflows/publish.yml +0 -0
  21. {qql_cli-2.1.0 → qql_cli-2.2.0}/.gitignore +0 -0
  22. {qql_cli-2.1.0 → qql_cli-2.2.0}/LICENSE +0 -0
  23. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/_config.yml +0 -0
  24. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/collections.md +0 -0
  25. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/filters.md +0 -0
  26. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/insert.md +0 -0
  27. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/robots.txt +0 -0
  28. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/scripts.md +0 -0
  29. {qql_cli-2.1.0 → qql_cli-2.2.0}/docs/sitemap.xml +0 -0
  30. {qql_cli-2.1.0 → qql_cli-2.2.0}/main.py +0 -0
  31. {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/Features.md +0 -0
  32. {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/sample.qql +0 -0
  33. {qql_cli-2.1.0 → qql_cli-2.2.0}/resources/sample_v2.qql +0 -0
  34. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/__init__.py +0 -0
  35. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/config.py +0 -0
  36. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/dumper.py +0 -0
  37. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/embedder.py +0 -0
  38. {qql_cli-2.1.0 → qql_cli-2.2.0}/src/qql/exceptions.py +0 -0
  39. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/__init__.py +0 -0
  40. {qql_cli-2.1.0 → qql_cli-2.2.0}/tests/test_dumper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qql-cli
3
- Version: 2.1.0
3
+ Version: 2.2.0
4
4
  Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore.
5
5
  Project-URL: Homepage, https://github.com/pavanjava/qql
6
6
  Project-URL: Repository, https://github.com/pavanjava/qql
@@ -56,9 +56,9 @@ Description-Content-Type: text/markdown
56
56
  [![PyPI version](https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI)](https://pypi.org/project/qql-cli/)
57
57
  [![Python 3.12+](https://img.shields.io/pypi/pyversions/qql-cli)](https://pypi.org/project/qql-cli/)
58
58
  [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
59
- [![Tests](https://img.shields.io/badge/tests-375%20passing-brightgreen)](tests/)
59
+ [![Tests](https://img.shields.io/badge/tests-405%20passing-brightgreen)](tests/)
60
60
 
61
- Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
61
+ Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
62
62
 
63
63
  ```
64
64
  qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
@@ -99,7 +99,7 @@ Your query string
99
99
  Qdrant instance
100
100
  ```
101
101
 
102
- When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
102
+ When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`.
103
103
 
104
104
  ---
105
105
 
@@ -133,7 +133,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
133
133
  |---|---|
134
134
  | [Getting Started](docs/getting-started.md) | Installation, connecting, first queries |
135
135
  | [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
136
- | [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
136
+ | [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, point retrieval, pagination, hybrid, reranking, recommendations |
137
137
  | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
138
138
  | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
139
139
  | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
@@ -153,11 +153,20 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
153
153
  SEARCH articles SIMILAR TO 'query' LIMIT 10
154
154
  SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
155
155
  SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
156
+ SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
156
157
  SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
157
158
 
159
+ -- Scroll
160
+ SCROLL FROM articles LIMIT 50
161
+ SCROLL FROM articles WHERE year >= 2024 LIMIT 50
162
+ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
163
+
158
164
  -- Recommend
159
165
  RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
160
166
 
167
+ -- Select (retrieve a point by ID)
168
+ SELECT * FROM articles WHERE id = '3f2e1a4b-...'
169
+
161
170
  -- Collections
162
171
  CREATE COLLECTION articles
163
172
  CREATE COLLECTION articles HYBRID
@@ -188,7 +197,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
188
197
  pytest tests/ -v
189
198
  ```
190
199
 
191
- Expected: **375 tests passing**.
200
+ Expected: **405 tests passing**.
192
201
 
193
202
  ---
194
203
 
@@ -5,9 +5,9 @@
5
5
  [![PyPI version](https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI)](https://pypi.org/project/qql-cli/)
6
6
  [![Python 3.12+](https://img.shields.io/pypi/pyversions/qql-cli)](https://pypi.org/project/qql-cli/)
7
7
  [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
8
- [![Tests](https://img.shields.io/badge/tests-375%20passing-brightgreen)](tests/)
8
+ [![Tests](https://img.shields.io/badge/tests-405%20passing-brightgreen)](tests/)
9
9
 
10
- Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
10
+ Write `INSERT`, `SELECT`, `SEARCH`, `SCROLL`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
11
11
 
12
12
  ```
13
13
  qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
@@ -48,7 +48,7 @@ Your query string
48
48
  Qdrant instance
49
49
  ```
50
50
 
51
- When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
51
+ When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can switch hybrid search to DBSF with `FUSION 'dbsf'`.
52
52
 
53
53
  ---
54
54
 
@@ -82,7 +82,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
82
82
  |---|---|
83
83
  | [Getting Started](docs/getting-started.md) | Installation, connecting, first queries |
84
84
  | [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
85
- | [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
85
+ | [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, point retrieval, pagination, hybrid, reranking, recommendations |
86
86
  | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
87
87
  | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
88
88
  | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
@@ -102,11 +102,20 @@ INSERT BULK INTO COLLECTION articles VALUES [{'text': '...'}, {'text': '...'}]
102
102
  SEARCH articles SIMILAR TO 'query' LIMIT 10
103
103
  SEARCH articles SIMILAR TO 'query' LIMIT 10 WHERE year >= 2020
104
104
  SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID
105
+ SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID FUSION 'dbsf'
105
106
  SEARCH articles SIMILAR TO 'query' LIMIT 10 USING HYBRID RERANK
106
107
 
108
+ -- Scroll
109
+ SCROLL FROM articles LIMIT 50
110
+ SCROLL FROM articles WHERE year >= 2024 LIMIT 50
111
+ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
112
+
107
113
  -- Recommend
108
114
  RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
109
115
 
116
+ -- Select (retrieve a point by ID)
117
+ SELECT * FROM articles WHERE id = '3f2e1a4b-...'
118
+
110
119
  -- Collections
111
120
  CREATE COLLECTION articles
112
121
  CREATE COLLECTION articles HYBRID
@@ -137,7 +146,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
137
146
  pytest tests/ -v
138
147
  ```
139
148
 
140
- Expected: **375 tests passing**.
149
+ Expected: **405 tests passing**.
141
150
 
142
151
  ---
143
152
 
@@ -24,7 +24,7 @@ Your query string
24
24
  Qdrant instance
25
25
  ```
26
26
 
27
- When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) to merge the results of both retrieval methods.
27
+ When you run `INSERT`, the `text` field is automatically converted into a dense vector using [Fastembed](https://github.com/qdrant/fastembed). In **hybrid mode** (`USING HYBRID`), a sparse BM25 vector is also generated alongside the dense vector, and searches use Qdrant's Reciprocal Rank Fusion (RRF) by default to merge the results of both retrieval methods. You can override that with `FUSION 'dbsf'` on hybrid searches.
28
28
 
29
29
  ---
30
30
 
@@ -138,8 +138,14 @@ SEARCH notes SIMILAR TO 'vector storage engines' LIMIT 3
138
138
  -- Filter results
139
139
  SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 WHERE year >= 2023
140
140
 
141
+ -- Browse with pagination
142
+ SCROLL FROM notes LIMIT 10
143
+
141
144
  -- List all collections
142
145
  SHOW COLLECTIONS
146
+
147
+ -- Retrieve a point by ID
148
+ SELECT * FROM notes WHERE id = 1
143
149
  ```
144
150
 
145
151
  ---
@@ -147,7 +153,7 @@ SHOW COLLECTIONS
147
153
  ## Next Steps
148
154
 
149
155
  - [INSERT / INSERT BULK](insert.md) — adding documents
150
- - [SEARCH / RECOMMEND / Hybrid / RERANK](search.md) — querying
156
+ - [SEARCH / SELECT / SCROLL / RECOMMEND / Hybrid / RERANK](search.md) — querying
151
157
  - [WHERE Filters](filters.md) — payload filtering
152
158
  - [Collections & Quantization](collections.md) — managing collections
153
159
  - [Scripts: EXECUTE / DUMP](scripts.md) — automating with script files
@@ -114,7 +114,7 @@
114
114
  <a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/v/qql-cli?color=blue&label=PyPI" alt="PyPI version" /></a>
115
115
  <a href="https://pypi.org/project/qql-cli/"><img src="https://img.shields.io/pypi/pyversions/qql-cli" alt="Python versions" /></a>
116
116
  <a href="https://github.com/pavanjava/qql/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License" /></a>
117
- <a href="https://github.com/pavanjava/qql/actions"><img src="https://img.shields.io/badge/tests-375%20passing-brightgreen" alt="375 tests" /></a>
117
+ <a href="https://github.com/pavanjava/qql/actions"><img src="https://img.shields.io/badge/tests-405%20passing-brightgreen" alt="405 tests" /></a>
118
118
  </div>
119
119
 
120
120
  <pre><span class="cmt"># Install</span>
@@ -148,8 +148,8 @@
148
148
  <p>Adding documents, batch inserts, payload types</p>
149
149
  </a>
150
150
  <a class="card" href="search">
151
- <h3>SEARCH / RECOMMEND</h3>
152
- <p>Semantic search, hybrid search, reranking, recommendations</p>
151
+ <h3>SEARCH / SELECT / SCROLL / RECOMMEND</h3>
152
+ <p>Semantic search, point retrieval, pagination, hybrid search, reranking, recommendations</p>
153
153
  </a>
154
154
  <a class="card" href="filters">
155
155
  <h3>WHERE Filters</h3>
@@ -40,6 +40,15 @@ result = run_query(
40
40
  for hit in result.data:
41
41
  print(hit["score"], hit["payload"])
42
42
 
43
+ # Scroll / pagination
44
+ result = run_query(
45
+ "SCROLL FROM notes LIMIT 2",
46
+ url="http://localhost:6333",
47
+ )
48
+ for point in result.data["points"]:
49
+ print(point["id"], point["payload"])
50
+ print(result.data["next_offset"])
51
+
43
52
  # Bulk insert (all records embedded and upserted in one call)
44
53
  result = run_query(
45
54
  """INSERT BULK INTO COLLECTION notes VALUES [
@@ -58,6 +67,13 @@ result = run_query(
58
67
  for hit in result.data:
59
68
  print(hit["score"], hit["payload"])
60
69
 
70
+ # Retrieve a point by ID
71
+ result = run_query(
72
+ "SELECT * FROM notes WHERE id = 1",
73
+ url="http://localhost:6333",
74
+ )
75
+ print(result.data) # {"id": "1", "payload": {...}}
76
+
61
77
  # Delete by filter
62
78
  result = run_query(
63
79
  "DELETE FROM notes WHERE year < 2023",
@@ -111,7 +127,9 @@ class ExecutionResult:
111
127
  | INSERT (dense) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
112
128
  | INSERT (hybrid) | `{"id": int \| "<uuid>", "collection": "<name>"}` |
113
129
  | INSERT BULK | `None` (count in `result.message`) |
130
+ | SELECT | `{"id": str, "payload": dict}` or `None` when not found |
114
131
  | SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
132
+ | SCROLL | `{"points": [{"id": str, "payload": dict}, ...], "next_offset": str \| None}` |
115
133
  | RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
116
134
  | SHOW COLLECTIONS | `["name1", "name2", ...]` |
117
135
  | CREATE COLLECTION | `None` |
@@ -36,6 +36,9 @@ SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING MODEL 'BAAI/bge-small-en-v1.5'
36
36
  -- Hybrid with custom dense model
37
37
  SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
38
38
 
39
+ -- Hybrid with explicit fusion strategy
40
+ SEARCH docs SIMILAR TO 'hello' LIMIT 5 USING HYBRID FUSION 'dbsf'
41
+
39
42
  -- Hybrid with both custom
40
43
  SEARCH docs SIMILAR TO 'hello' LIMIT 5
41
44
  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' SPARSE MODEL 'prithivida/Splade_PP_en_v1'
@@ -159,7 +162,7 @@ Tests do not require a running Qdrant instance — the Qdrant client is mocked.
159
162
  pytest tests/ -v
160
163
  ```
161
164
 
162
- Expected output: **375 tests passing**.
165
+ Expected output: **405 tests passing**.
163
166
 
164
167
  ---
165
168
 
@@ -171,12 +174,14 @@ Expected output: **375 tests passing**.
171
174
  | `Connection failed: ...` | Qdrant unreachable at given URL | Check that Qdrant is running and the URL is correct |
172
175
  | `INSERT requires a 'text' field in VALUES` | `text` key missing from the VALUES dict | Add `'text': '...'` to your dict |
173
176
  | `Vector dimension mismatch: collection '...' expects X dims, but model produces Y dims` | Model used in INSERT differs from the one used to create the collection | Use `USING MODEL` to specify the same model as the collection was created with |
174
- | `Collection '...' does not exist` | SEARCH / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
175
- | `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax; QQL does not support SQL SELECT |
177
+ | `Collection '...' does not exist` | SEARCH / SCROLL / SELECT / DROP / DELETE on a non-existent collection | Check name spelling or run `SHOW COLLECTIONS` |
178
+ | `Unexpected token '...'; expected a QQL statement keyword` | Unrecognized statement | Check the query syntax and supported statement list |
179
+ | `SELECT requires a string or integer point id, got '...'` | `SELECT` used with a non-ID filter value | Use `SELECT * FROM <collection> WHERE id = '<id>'` or an integer ID |
176
180
  | `Unterminated string literal (at position N)` | A string is missing its closing quote | Close the string with a matching `'` or `"` |
177
181
  | `Unexpected character '@' (at position N)` | A character not part of QQL syntax | Remove or quote the offending character |
178
182
  | `Expected a filter operator after field '...'` | Unknown operator in WHERE clause | Use one of: `=`, `!=`, `>`, `>=`, `<`, `<=`, `IN`, `NOT IN`, `BETWEEN`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `MATCH` |
179
183
  | `Expected ')' ...` | Unclosed parenthesis in WHERE clause | Add the missing `)` to close the group |
180
184
  | `Qdrant error during SEARCH: ...` | Hybrid search on a non-hybrid collection, or wrong vector names | Ensure the collection was created with `HYBRID` before using `USING HYBRID` in INSERT/SEARCH |
185
+ | `Qdrant error during SCROLL: ...` | Qdrant rejected scroll request | Verify collection state, filter, and cursor (`AFTER`) value |
181
186
  | `Unknown index type '...'` | Invalid schema type in CREATE INDEX | Use one of: `keyword`, `integer`, `float`, `bool`, `text`, `geo`, `datetime` |
182
187
  | `Qdrant error during CREATE INDEX: ...` | Qdrant rejected the index creation | Check field name and collection state |
@@ -1,4 +1,4 @@
1
- # SEARCH, RECOMMEND, Hybrid Search & Reranking
1
+ # SEARCH, SELECT, SCROLL, RECOMMEND, Hybrid Search & Reranking
2
2
 
3
3
  ---
4
4
 
@@ -14,7 +14,7 @@ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n>
14
14
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING MODEL '<model_name>'
15
15
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING MODEL '<model>'] WHERE <filter>
16
16
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID
17
- SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
17
+ SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING HYBRID [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>'] [WHERE <filter>]
18
18
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> USING SPARSE [MODEL '<sparse_model>']
19
19
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> EXACT
20
20
  SEARCH <collection_name> SIMILAR TO '<query_text>' LIMIT <n> [USING ...] [WHERE <filter>] [RERANK] WITH { hnsw_ef: <n>, exact: true|false, acorn: true|false }
@@ -33,7 +33,7 @@ Search only papers published after 2020:
33
33
  SEARCH articles SIMILAR TO 'deep learning' LIMIT 10 WHERE year > 2020
34
34
  ```
35
35
 
36
- Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF):
36
+ Hybrid search (combines dense semantic + sparse BM25 keyword retrieval via RRF by default):
37
37
  ```sql
38
38
  SEARCH articles SIMILAR TO 'attention mechanism' LIMIT 10 USING HYBRID
39
39
  ```
@@ -70,6 +70,28 @@ Results are displayed as a table with three columns:
70
70
 
71
71
  ---
72
72
 
73
+ ## SELECT — retrieve a point by ID
74
+
75
+ Fetches a single point payload by exact point ID.
76
+
77
+ **Syntax:**
78
+ ```sql
79
+ SELECT * FROM <collection_name> WHERE id = '<point_id>'
80
+ SELECT * FROM <collection_name> WHERE id = <integer_id>
81
+ ```
82
+
83
+ **Examples:**
84
+ ```sql
85
+ SELECT * FROM articles WHERE id = '3f2e1a4b-8c91-4d0e-b123-abc123def456'
86
+ SELECT * FROM articles WHERE id = 42
87
+ ```
88
+
89
+ `SELECT` in this version is intentionally strict:
90
+ - only `*` projection is supported
91
+ - only `WHERE id = ...` is supported
92
+
93
+ ---
94
+
73
95
  ## Query-Time Search Params (`EXACT`, `WITH`)
74
96
 
75
97
  Use these when you want to debug retrieval quality or tune recall without changing collection-level settings.
@@ -98,15 +120,41 @@ SEARCH articles SIMILAR TO 'RAG' LIMIT 10 WHERE tag = 'li' WITH { acorn: true }
98
120
 
99
121
  ---
100
122
 
123
+ ## SCROLL — pagination / browsing
124
+
125
+ Use `SCROLL` to iterate through points in a collection page by page.
126
+
127
+ **Syntax:**
128
+ ```sql
129
+ SCROLL FROM <collection_name> LIMIT <n>
130
+ SCROLL FROM <collection_name> WHERE <filter> LIMIT <n>
131
+ SCROLL FROM <collection_name> AFTER '<point_id>' LIMIT <n>
132
+ SCROLL FROM <collection_name> WHERE <filter> AFTER <point_id> LIMIT <n>
133
+ ```
134
+
135
+ **Examples:**
136
+ ```sql
137
+ SCROLL FROM articles LIMIT 50
138
+ SCROLL FROM articles WHERE year >= 2024 LIMIT 50
139
+ SCROLL FROM articles AFTER 'cursor-id' LIMIT 50
140
+ ```
141
+
142
+ **Behavior:**
143
+ - Returns points in ID order with payloads.
144
+ - Returns a `next_offset` cursor when more points are available.
145
+ - Use `AFTER <next_offset>` to fetch the next page.
146
+
147
+ ---
148
+
101
149
  ## Hybrid Search (USING HYBRID)
102
150
 
103
- Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query and merges the results with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm. This typically outperforms either method alone.
151
+ Hybrid search combines **dense semantic vectors** and **sparse BM25 keyword vectors** in a single query. By default QQL merges the two result sets with Qdrant's **Reciprocal Rank Fusion (RRF)** algorithm, and you can optionally switch to **DBSF** with a `FUSION` clause.
104
152
 
105
153
  ### How it works internally
106
154
 
107
155
  1. Both a dense vector (`TextEmbedding`) and a sparse BM25 vector (`SparseTextEmbedding`) are generated from your query text.
108
156
  2. Qdrant fetches the top candidates from each index independently (`prefetch limit = LIMIT × 4`).
109
- 3. The two result lists are merged using RRF a rank-based fusion that does not require score normalization.
157
+ 3. The two result lists are merged using the selected fusion strategy (`RRF` by default, or `DBSF` when requested).
110
158
  4. The final top-N results are returned.
111
159
 
112
160
  ### Step 1: Create a hybrid collection
@@ -139,6 +187,9 @@ SEARCH articles SIMILAR TO 'transformer architecture' LIMIT 10 USING HYBRID
139
187
  -- Hybrid search with a WHERE filter
140
188
  SEARCH articles SIMILAR TO 'attention' LIMIT 10 USING HYBRID WHERE year >= 2017
141
189
 
190
+ -- Hybrid with DBSF fusion
191
+ SEARCH articles SIMILAR TO 'hybrid retrieval' LIMIT 10 USING HYBRID FUSION 'dbsf'
192
+
142
193
  -- Hybrid with custom dense model
143
194
  SEARCH articles SIMILAR TO 'embeddings' LIMIT 5
144
195
  USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5'
@@ -154,6 +205,7 @@ SEARCH articles SIMILAR TO 'sparse retrieval' LIMIT 5
154
205
  |---|---|
155
206
  | Dense model | configured default (`sentence-transformers/all-MiniLM-L6-v2`) |
156
207
  | Sparse model | `Qdrant/bm25` |
208
+ | Fusion | `rrf` |
157
209
 
158
210
  ### Dense vs. hybrid — when to use which
159
211
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "qql-cli"
3
- version = "2.1.0"
3
+ version = "2.2.0"
4
4
  description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore."
5
5
  readme = "README.md"
6
6
  license = { file = "LICENSE" }
@@ -180,6 +180,20 @@ class ShowCollectionsStmt:
180
180
  pass
181
181
 
182
182
 
183
+ @dataclass(frozen=True)
184
+ class SelectStmt:
185
+ collection: str
186
+ point_id: str | int
187
+
188
+
189
+ @dataclass(frozen=True)
190
+ class ScrollStmt:
191
+ collection: str
192
+ limit: int
193
+ query_filter: FilterExpr | None = None
194
+ after: str | int | None = None
195
+
196
+
183
197
  @dataclass(frozen=True)
184
198
  class SearchStmt:
185
199
  collection: str
@@ -187,6 +201,7 @@ class SearchStmt:
187
201
  limit: int
188
202
  model: str | None # dense model; None → use config default
189
203
  hybrid: bool = False # if True, use prefetch+RRF hybrid search
204
+ fusion: str | None = None # hybrid fusion strategy; None → default rrf
190
205
  sparse_only: bool = False # if True, query only the sparse vector (no dense)
191
206
  sparse_model: str | None = None # sparse model for hybrid/sparse-only; None → SparseEmbedder.DEFAULT_MODEL
192
207
  query_filter: FilterExpr | None = None # optional WHERE clause; default keeps existing tests valid
@@ -225,6 +240,8 @@ ASTNode = (
225
240
  | CreateIndexStmt
226
241
  | DropCollectionStmt
227
242
  | ShowCollectionsStmt
243
+ | SelectStmt
244
+ | ScrollStmt
228
245
  | SearchStmt
229
246
  | RecommendStmt
230
247
  | DeleteStmt
@@ -49,10 +49,18 @@ Available statements:
49
49
  [yellow]SHOW COLLECTIONS[/yellow]
50
50
  List all collections in the connected Qdrant instance.
51
51
 
52
+ [yellow]SCROLL FROM[/yellow] <name> [yellow]LIMIT[/yellow] <n>
53
+ Paginate points by ID order.
54
+ Optional: [yellow]WHERE[/yellow] <filter>
55
+ Optional: [yellow]AFTER[/yellow] '<id>'|<int>
56
+
57
+ [yellow]SELECT * FROM[/yellow] <name> [yellow]WHERE id =[/yellow] '<id>'|<int>
58
+ Retrieve a single point by its ID and return its payload.
59
+
52
60
  [yellow]SEARCH[/yellow] <name> [yellow]SIMILAR TO[/yellow] '<text>' [yellow]LIMIT[/yellow] <n>
53
61
  Semantic search by vector similarity.
54
62
  Optional: [yellow]USING MODEL[/yellow] '<model>'
55
- Optional: [yellow]USING HYBRID[/yellow] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>']
63
+ Optional: [yellow]USING HYBRID[/yellow] [FUSION 'rrf|dbsf'] [DENSE MODEL '<model>'] [SPARSE MODEL '<model>']
56
64
  Optional: [yellow]USING SPARSE[/yellow] [MODEL '<model>'] sparse-vector-only search
57
65
  Optional: [yellow]WHERE[/yellow] <filter> (e.g. WHERE year > 2020 AND status = 'ok')
58
66
  Optional: [yellow]RERANK[/yellow] [MODEL '<model>'] rerank results with a cross-encoder
@@ -400,5 +408,28 @@ def _run_and_print(executor: Executor, query: str) -> None:
400
408
  console.print(table)
401
409
  return
402
410
 
411
+ # Pretty-print scroll results
412
+ if isinstance(result.data, dict) and "points" in result.data and "next_offset" in result.data:
413
+ points = result.data["points"]
414
+ if points:
415
+ table = Table(show_header=True, header_style="bold cyan")
416
+ table.add_column("ID")
417
+ table.add_column("Payload")
418
+ for point in points:
419
+ table.add_row(point["id"], str(point["payload"]))
420
+ console.print(table)
421
+ if result.data["next_offset"] is not None:
422
+ console.print(f"[dim]next_offset: {result.data['next_offset']}[/dim]")
423
+ return
424
+
425
+ # Pretty-print SELECT result
426
+ if isinstance(result.data, dict) and "id" in result.data and "payload" in result.data:
427
+ table = Table(show_header=True, header_style="bold cyan")
428
+ table.add_column("ID")
429
+ table.add_column("Payload")
430
+ table.add_row(str(result.data["id"]), str(result.data["payload"]))
431
+ console.print(table)
432
+ return
433
+
403
434
  # Fallback: print data as-is
404
435
  console.print(result.data)
@@ -76,6 +76,8 @@ from .ast_nodes import (
76
76
  QuantizationConfig,
77
77
  QuantizationType,
78
78
  RecommendStmt,
79
+ SelectStmt,
80
+ ScrollStmt,
79
81
  SearchStmt,
80
82
  SearchWith,
81
83
  ShowCollectionsStmt,
@@ -115,6 +117,10 @@ class Executor:
115
117
  return self._execute_drop(node)
116
118
  if isinstance(node, ShowCollectionsStmt):
117
119
  return self._execute_show(node)
120
+ if isinstance(node, ScrollStmt):
121
+ return self._execute_scroll(node)
122
+ if isinstance(node, SelectStmt):
123
+ return self._execute_select(node)
118
124
  if isinstance(node, SearchStmt):
119
125
  return self._execute_search(node)
120
126
  if isinstance(node, RecommendStmt):
@@ -412,6 +418,65 @@ class Executor:
412
418
  data=names,
413
419
  )
414
420
 
421
+ def _execute_scroll(self, node: ScrollStmt) -> ExecutionResult:
422
+ if not self._client.collection_exists(node.collection):
423
+ raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
424
+
425
+ scroll_filter: Filter | None = None
426
+ if node.query_filter is not None:
427
+ scroll_filter = self._wrap_as_filter(
428
+ self._build_qdrant_filter(node.query_filter)
429
+ )
430
+
431
+ try:
432
+ records, next_offset = self._client.scroll(
433
+ collection_name=node.collection,
434
+ scroll_filter=scroll_filter,
435
+ limit=node.limit,
436
+ offset=node.after,
437
+ with_payload=True,
438
+ with_vectors=False,
439
+ )
440
+ except UnexpectedResponse as e:
441
+ raise QQLRuntimeError(f"Qdrant error during SCROLL: {e}") from e
442
+
443
+ points = [
444
+ {"id": str(rec.id), "payload": rec.payload or {}}
445
+ for rec in records
446
+ ]
447
+ return ExecutionResult(
448
+ success=True,
449
+ message=f"Scrolled {len(points)} point(s) from '{node.collection}'",
450
+ data={"points": points, "next_offset": None if next_offset is None else str(next_offset)},
451
+ )
452
+
453
+ def _execute_select(self, node: SelectStmt) -> ExecutionResult:
454
+ if not self._client.collection_exists(node.collection):
455
+ raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
456
+
457
+ try:
458
+ records = self._client.retrieve(
459
+ collection_name=node.collection,
460
+ ids=[node.point_id],
461
+ with_payload=True,
462
+ with_vectors=False,
463
+ )
464
+ except UnexpectedResponse as e:
465
+ raise QQLRuntimeError(f"Qdrant error during SELECT: {e}") from e
466
+
467
+ if not records:
468
+ return ExecutionResult(
469
+ success=True,
470
+ message=f"Point '{node.point_id}' not found in '{node.collection}'",
471
+ )
472
+
473
+ record = records[0]
474
+ return ExecutionResult(
475
+ success=True,
476
+ message=f"Retrieved point '{node.point_id}' from '{node.collection}'",
477
+ data={"id": str(record.id), "payload": record.payload or {}},
478
+ )
479
+
415
480
  def _execute_search(self, node: SearchStmt) -> ExecutionResult:
416
481
  if not self._client.collection_exists(node.collection):
417
482
  raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
@@ -429,7 +494,7 @@ class Executor:
429
494
  # enough material to reorder; only `node.limit` results are returned.
430
495
  fetch_limit = node.limit * _RERANK_FETCH_MULTIPLIER if node.rerank else node.limit
431
496
 
432
- # ── Hybrid SEARCH: prefetch dense+sparse, fuse with RRF ───────────
497
+ # ── Hybrid SEARCH: prefetch dense+sparse, fuse with the requested strategy ──
433
498
  if node.hybrid:
434
499
  dense_model = node.model or self._config.default_model
435
500
  sparse_model_name = node.sparse_model or SparseEmbedder.DEFAULT_MODEL
@@ -460,7 +525,7 @@ class Executor:
460
525
  params=search_params,
461
526
  ),
462
527
  ],
463
- query=FusionQuery(fusion=Fusion.RRF),
528
+ query=FusionQuery(fusion=self._resolve_hybrid_fusion(node.fusion)),
464
529
  limit=fetch_limit,
465
530
  query_filter=qdrant_filter,
466
531
  )
@@ -563,6 +628,15 @@ class Executor:
563
628
  data=results,
564
629
  )
565
630
 
631
+ def _resolve_hybrid_fusion(self, fusion: str | None) -> Fusion:
632
+ if fusion is None or fusion == "rrf":
633
+ return Fusion.RRF
634
+ if fusion == "dbsf":
635
+ return Fusion.DBSF
636
+ raise QQLRuntimeError(
637
+ f"Unsupported hybrid fusion '{fusion}'; expected 'rrf' or 'dbsf'"
638
+ )
639
+
566
640
  def _execute_recommend(self, node: RecommendStmt) -> ExecutionResult:
567
641
  if not self._client.collection_exists(node.collection):
568
642
  raise QQLRuntimeError(f"Collection '{node.collection}' does not exist")
@@ -14,6 +14,7 @@ class TokenKind(Enum):
14
14
  USING = auto()
15
15
  MODEL = auto()
16
16
  HYBRID = auto()
17
+ FUSION = auto()
17
18
  DENSE = auto()
18
19
  SPARSE = auto()
19
20
  RERANK = auto()
@@ -34,7 +35,9 @@ class TokenKind(Enum):
34
35
  ON = auto()
35
36
  DROP = auto()
36
37
  SHOW = auto()
38
+ SELECT = auto()
37
39
  COLLECTIONS = auto()
40
+ SCROLL = auto()
38
41
  SEARCH = auto()
39
42
  RECOMMEND = auto()
40
43
  POSITIVE = auto()
@@ -47,6 +50,7 @@ class TokenKind(Enum):
47
50
  OFFSET = auto()
48
51
  SCORE = auto()
49
52
  THRESHOLD = auto()
53
+ AFTER = auto()
50
54
  LOOKUP = auto()
51
55
  VECTOR = auto()
52
56
  DELETE = auto()
@@ -79,6 +83,7 @@ class TokenKind(Enum):
79
83
  RBRACKET = auto()
80
84
  LPAREN = auto()
81
85
  RPAREN = auto()
86
+ STAR = auto()
82
87
  COLON = auto()
83
88
  COMMA = auto()
84
89
  EQUALS = auto()
@@ -102,6 +107,7 @@ _KEYWORDS: dict[str, TokenKind] = {
102
107
  "USING": TokenKind.USING,
103
108
  "MODEL": TokenKind.MODEL,
104
109
  "HYBRID": TokenKind.HYBRID,
110
+ "FUSION": TokenKind.FUSION,
105
111
  "DENSE": TokenKind.DENSE,
106
112
  "SPARSE": TokenKind.SPARSE,
107
113
  "RERANK": TokenKind.RERANK,
@@ -122,7 +128,9 @@ _KEYWORDS: dict[str, TokenKind] = {
122
128
  "ON": TokenKind.ON,
123
129
  "DROP": TokenKind.DROP,
124
130
  "SHOW": TokenKind.SHOW,
131
+ "SELECT": TokenKind.SELECT,
125
132
  "COLLECTIONS": TokenKind.COLLECTIONS,
133
+ "SCROLL": TokenKind.SCROLL,
126
134
  "SEARCH": TokenKind.SEARCH,
127
135
  "RECOMMEND": TokenKind.RECOMMEND,
128
136
  "POSITIVE": TokenKind.POSITIVE,
@@ -135,6 +143,7 @@ _KEYWORDS: dict[str, TokenKind] = {
135
143
  "OFFSET": TokenKind.OFFSET,
136
144
  "SCORE": TokenKind.SCORE,
137
145
  "THRESHOLD": TokenKind.THRESHOLD,
146
+ "AFTER": TokenKind.AFTER,
138
147
  "LOOKUP": TokenKind.LOOKUP,
139
148
  "VECTOR": TokenKind.VECTOR,
140
149
  "DELETE": TokenKind.DELETE,
@@ -197,6 +206,9 @@ class Lexer:
197
206
  elif ch == ")":
198
207
  tokens.append(Token(TokenKind.RPAREN, ")", i))
199
208
  i += 1
209
+ elif ch == "*":
210
+ tokens.append(Token(TokenKind.STAR, "*", i))
211
+ i += 1
200
212
  elif ch == ":":
201
213
  tokens.append(Token(TokenKind.COLON, ":", i))
202
214
  i += 1
@@ -26,6 +26,8 @@ from .ast_nodes import (
26
26
  QuantizationConfig,
27
27
  QuantizationType,
28
28
  RecommendStmt,
29
+ SelectStmt,
30
+ ScrollStmt,
29
31
  SearchStmt,
30
32
  SearchWith,
31
33
  ShowCollectionsStmt,
@@ -43,6 +45,8 @@ _CMP_OPS: dict[TokenKind, str] = {
43
45
  TokenKind.LTE: "<=",
44
46
  }
45
47
 
48
+ _HYBRID_FUSION_VALUES = {"rrf", "dbsf"}
49
+
46
50
 
47
51
  class Parser:
48
52
  def __init__(self, tokens: list[Token]) -> None:
@@ -61,6 +65,10 @@ class Parser:
61
65
  node = self._parse_drop()
62
66
  elif tok.kind == TokenKind.SHOW:
63
67
  node = self._parse_show()
68
+ elif tok.kind == TokenKind.SCROLL:
69
+ node = self._parse_scroll()
70
+ elif tok.kind == TokenKind.SELECT:
71
+ node = self._parse_select()
64
72
  elif tok.kind == TokenKind.SEARCH:
65
73
  node = self._parse_search()
66
74
  elif tok.kind == TokenKind.RECOMMEND:
@@ -288,6 +296,43 @@ class Parser:
288
296
  self._expect(TokenKind.COLLECTIONS)
289
297
  return ShowCollectionsStmt()
290
298
 
299
+ def _parse_scroll(self) -> ScrollStmt:
300
+ self._expect(TokenKind.SCROLL)
301
+ self._expect(TokenKind.FROM)
302
+ collection = self._parse_identifier()
303
+
304
+ query_filter: FilterExpr | None = None
305
+ after: str | int | None = None
306
+
307
+ if self._peek().kind == TokenKind.WHERE:
308
+ self._advance()
309
+ query_filter = self._parse_filter_expr()
310
+
311
+ if self._peek().kind == TokenKind.AFTER:
312
+ self._advance()
313
+ after = self._parse_point_id_value("SCROLL AFTER")
314
+
315
+ self._expect(TokenKind.LIMIT)
316
+ limit = int(self._expect(TokenKind.INTEGER).value)
317
+
318
+ return ScrollStmt(
319
+ collection=collection,
320
+ limit=limit,
321
+ query_filter=query_filter,
322
+ after=after,
323
+ )
324
+
325
+ def _parse_select(self) -> SelectStmt:
326
+ self._expect(TokenKind.SELECT)
327
+ self._expect(TokenKind.STAR)
328
+ self._expect(TokenKind.FROM)
329
+ collection = self._parse_identifier()
330
+ self._expect(TokenKind.WHERE)
331
+ self._expect(TokenKind.ID)
332
+ self._expect(TokenKind.EQUALS)
333
+ point_id = self._parse_point_id_value("SELECT")
334
+ return SelectStmt(collection=collection, point_id=point_id)
335
+
291
336
  def _parse_search(self) -> SearchStmt:
292
337
  self._expect(TokenKind.SEARCH)
293
338
  collection = self._parse_identifier()
@@ -304,6 +349,7 @@ class Parser:
304
349
 
305
350
  model: str | None = None
306
351
  hybrid: bool = False
352
+ fusion: str | None = None
307
353
  sparse_only: bool = False
308
354
  sparse_model: str | None = None
309
355
  if self._peek().kind == TokenKind.USING:
@@ -311,9 +357,18 @@ class Parser:
311
357
  if self._peek().kind == TokenKind.HYBRID:
312
358
  self._advance() # consume HYBRID
313
359
  hybrid = True
314
- # Optional DENSE MODEL and/or SPARSE MODEL sub-clauses, any order
315
- while self._peek().kind in (TokenKind.DENSE, TokenKind.SPARSE):
360
+ # Optional FUSION / DENSE MODEL / SPARSE MODEL sub-clauses, any order.
361
+ while self._peek().kind in (TokenKind.FUSION, TokenKind.DENSE, TokenKind.SPARSE):
316
362
  sub = self._advance()
363
+ if sub.kind == TokenKind.FUSION:
364
+ value_tok = self._expect(TokenKind.STRING)
365
+ fusion = value_tok.value.lower()
366
+ if fusion not in _HYBRID_FUSION_VALUES:
367
+ raise QQLSyntaxError(
368
+ f"Unsupported hybrid fusion '{value_tok.value}'; expected 'rrf' or 'dbsf'",
369
+ value_tok.pos,
370
+ )
371
+ continue
317
372
  self._expect(TokenKind.MODEL)
318
373
  m = self._expect(TokenKind.STRING).value
319
374
  if sub.kind == TokenKind.DENSE:
@@ -368,6 +423,7 @@ class Parser:
368
423
  limit=limit,
369
424
  model=model,
370
425
  hybrid=hybrid,
426
+ fusion=fusion,
371
427
  sparse_only=sparse_only,
372
428
  sparse_model=sparse_model,
373
429
  query_filter=query_filter,
@@ -457,17 +513,7 @@ class Parser:
457
513
  if self._peek().kind == TokenKind.ID:
458
514
  self._advance()
459
515
  self._expect(TokenKind.EQUALS)
460
- tok = self._peek()
461
- if tok.kind == TokenKind.STRING:
462
- self._advance()
463
- point_id: str | int = tok.value
464
- elif tok.kind == TokenKind.INTEGER:
465
- self._advance()
466
- point_id = int(tok.value)
467
- else:
468
- raise QQLSyntaxError(
469
- f"Expected string or integer for point id, got '{tok.value}'", tok.pos
470
- )
516
+ point_id = self._parse_point_id_value("DELETE")
471
517
  return DeleteStmt(collection=collection, point_id=point_id)
472
518
 
473
519
  query_filter = self._parse_filter_expr()
@@ -694,6 +740,19 @@ class Parser:
694
740
  self._expect(TokenKind.RPAREN)
695
741
  return tuple(items)
696
742
 
743
+ def _parse_point_id_value(self, statement: str) -> str | int:
744
+ tok = self._peek()
745
+ if tok.kind == TokenKind.STRING:
746
+ self._advance()
747
+ return tok.value
748
+ if tok.kind == TokenKind.INTEGER:
749
+ self._advance()
750
+ return int(tok.value)
751
+ raise QQLSyntaxError(
752
+ f"{statement} requires a string or integer point id, got '{tok.value}'",
753
+ tok.pos,
754
+ )
755
+
697
756
  # ── Dict / value parsers (for INSERT VALUES) ──────────────────────────
698
757
 
699
758
  def _parse_identifier(self) -> str:
@@ -24,6 +24,8 @@ _STMT_STARTERS = {
24
24
  TokenKind.CREATE,
25
25
  TokenKind.DROP,
26
26
  TokenKind.SHOW,
27
+ TokenKind.SELECT,
28
+ TokenKind.SCROLL,
27
29
  TokenKind.SEARCH,
28
30
  TokenKind.RECOMMEND,
29
31
  TokenKind.DELETE,
@@ -54,7 +56,7 @@ def split_statements(tokens: list[Token]) -> list[list[Token]]:
54
56
  """Split a flat token list into per-statement chunks.
55
57
 
56
58
  A new chunk begins whenever a statement-starter keyword (INSERT, CREATE,
57
- DROP, SHOW, SEARCH, RECOMMEND, DELETE) is encountered at
59
+ DROP, SHOW, SCROLL, SELECT, SEARCH, RECOMMEND, DELETE) is encountered at
58
60
  brace/bracket/paren depth 0.
59
61
  The EOF sentinel is consumed and never included in any chunk.
60
62
  """
@@ -10,6 +10,8 @@ from qql.ast_nodes import (
10
10
  QuantizationConfig,
11
11
  QuantizationType,
12
12
  RecommendStmt,
13
+ SelectStmt,
14
+ ScrollStmt,
13
15
  SearchStmt,
14
16
  SearchWith,
15
17
  ShowCollectionsStmt,
@@ -357,6 +359,101 @@ class TestShow:
357
359
  assert "docs" in result.data
358
360
 
359
361
 
362
+ class TestScroll:
363
+ def test_scroll_returns_points_and_next_offset(self, executor, mock_client, mocker):
364
+ mock_client.collection_exists.return_value = True
365
+ rec1 = mocker.MagicMock()
366
+ rec1.id = "a"
367
+ rec1.payload = {"text": "first"}
368
+ rec2 = mocker.MagicMock()
369
+ rec2.id = 2
370
+ rec2.payload = {"text": "second"}
371
+ mock_client.scroll.return_value = ([rec1, rec2], "next-1")
372
+
373
+ node = ScrollStmt(collection="notes", limit=2)
374
+ result = executor.execute(node)
375
+
376
+ mock_client.scroll.assert_called_once_with(
377
+ collection_name="notes",
378
+ scroll_filter=None,
379
+ limit=2,
380
+ offset=None,
381
+ with_payload=True,
382
+ with_vectors=False,
383
+ )
384
+ assert result.success is True
385
+ assert result.data == {
386
+ "points": [
387
+ {"id": "a", "payload": {"text": "first"}},
388
+ {"id": "2", "payload": {"text": "second"}},
389
+ ],
390
+ "next_offset": "next-1",
391
+ }
392
+
393
+ def test_scroll_with_after_and_filter(self, executor, mock_client, mocker):
394
+ from qql.ast_nodes import CompareExpr
395
+ from qdrant_client.models import Filter
396
+
397
+ mock_client.collection_exists.return_value = True
398
+ mock_client.scroll.return_value = ([], None)
399
+
400
+ node = ScrollStmt(
401
+ collection="notes",
402
+ limit=10,
403
+ after="cursor-id",
404
+ query_filter=CompareExpr(field="year", op=">=", value=2024),
405
+ )
406
+ executor.execute(node)
407
+
408
+ kwargs = mock_client.scroll.call_args.kwargs
409
+ assert kwargs["offset"] == "cursor-id"
410
+ assert isinstance(kwargs["scroll_filter"], Filter)
411
+
412
+ def test_scroll_nonexistent_collection_raises(self, executor, mock_client):
413
+ mock_client.collection_exists.return_value = False
414
+ node = ScrollStmt(collection="ghost", limit=5)
415
+ with pytest.raises(QQLRuntimeError, match="does not exist"):
416
+ executor.execute(node)
417
+
418
+
419
+ class TestSelect:
420
+ def test_select_by_id_returns_payload(self, executor, mock_client, mocker):
421
+ mock_client.collection_exists.return_value = True
422
+ rec = mocker.MagicMock()
423
+ rec.id = "abc-123"
424
+ rec.payload = {"text": "hello", "year": 2024}
425
+ mock_client.retrieve.return_value = [rec]
426
+
427
+ node = SelectStmt(collection="notes", point_id="abc-123")
428
+ result = executor.execute(node)
429
+
430
+ mock_client.retrieve.assert_called_once_with(
431
+ collection_name="notes",
432
+ ids=["abc-123"],
433
+ with_payload=True,
434
+ with_vectors=False,
435
+ )
436
+ assert result.success is True
437
+ assert result.data == {"id": "abc-123", "payload": {"text": "hello", "year": 2024}}
438
+
439
+ def test_select_not_found(self, executor, mock_client):
440
+ mock_client.collection_exists.return_value = True
441
+ mock_client.retrieve.return_value = []
442
+
443
+ node = SelectStmt(collection="notes", point_id=7)
444
+ result = executor.execute(node)
445
+
446
+ assert result.success is True
447
+ assert "not found" in result.message
448
+ assert result.data is None
449
+
450
+ def test_select_nonexistent_collection_raises(self, executor, mock_client):
451
+ mock_client.collection_exists.return_value = False
452
+ node = SelectStmt(collection="ghost", point_id="x")
453
+ with pytest.raises(QQLRuntimeError, match="does not exist"):
454
+ executor.execute(node)
455
+
456
+
360
457
  class TestSearch:
361
458
  def test_search_calls_qdrant_query_points(self, executor, mock_client, mocker):
362
459
  mock_client.collection_exists.return_value = True
@@ -1063,6 +1160,29 @@ class TestHybridSearch:
1063
1160
  assert isinstance(kw["query"], FusionQuery)
1064
1161
  assert kw["query"].fusion == Fusion.RRF
1065
1162
 
1163
+ def test_hybrid_search_uses_dbsf_fusion(
1164
+ self, executor, mock_client, mock_sparse_embedder, mocker
1165
+ ):
1166
+ from qdrant_client.models import Fusion, FusionQuery
1167
+
1168
+ mock_client.collection_exists.return_value = True
1169
+ mock_resp = mocker.MagicMock()
1170
+ mock_resp.points = []
1171
+ mock_client.query_points.return_value = mock_resp
1172
+
1173
+ node = SearchStmt(
1174
+ collection="col",
1175
+ query_text="q",
1176
+ limit=5,
1177
+ model=None,
1178
+ hybrid=True,
1179
+ fusion="dbsf",
1180
+ )
1181
+ executor.execute(node)
1182
+ kw = mock_client.query_points.call_args.kwargs
1183
+ assert isinstance(kw["query"], FusionQuery)
1184
+ assert kw["query"].fusion == Fusion.DBSF
1185
+
1066
1186
  def test_hybrid_search_prefetch_limit_is_4x(
1067
1187
  self, executor, mock_client, mock_sparse_embedder, mocker
1068
1188
  ):
@@ -39,6 +39,20 @@ class TestKeywords:
39
39
  assert ks[3] == TokenKind.TO
40
40
  assert ks[5] == TokenKind.LIMIT
41
41
 
42
+ def test_scroll_keywords(self):
43
+ ks = kinds("SCROLL FROM docs AFTER 'cursor-id' LIMIT 50")
44
+ assert ks[0] == TokenKind.SCROLL
45
+ assert ks[1] == TokenKind.FROM
46
+ assert TokenKind.AFTER in ks
47
+ assert TokenKind.LIMIT in ks
48
+
49
+ def test_select_keywords(self):
50
+ ks = kinds("SELECT * FROM notes WHERE id = 'abc'")
51
+ assert ks[0] == TokenKind.SELECT
52
+ assert ks[1] == TokenKind.STAR
53
+ assert ks[2] == TokenKind.FROM
54
+ assert ks[4] == TokenKind.WHERE
55
+
42
56
  def test_delete_keywords(self):
43
57
  ks = kinds("DELETE FROM foo WHERE id = 'abc'")
44
58
  assert ks[:4] == [TokenKind.DELETE, TokenKind.FROM, TokenKind.IDENTIFIER, TokenKind.WHERE]
@@ -89,6 +103,10 @@ class TestPunctuation:
89
103
  assert ks[0] == TokenKind.LBRACKET
90
104
  assert ks[-2] == TokenKind.RBRACKET
91
105
 
106
+ def test_star(self):
107
+ ks = kinds("*")
108
+ assert ks[0] == TokenKind.STAR
109
+
92
110
 
93
111
  class TestErrors:
94
112
  def test_unterminated_string(self):
@@ -212,6 +230,10 @@ class TestHybridKeyword:
212
230
  ks = kinds("sparse")
213
231
  assert ks[0] == TokenKind.SPARSE
214
232
 
233
+ def test_fusion_keyword(self):
234
+ ks = kinds("FUSION")
235
+ assert ks[0] == TokenKind.FUSION
236
+
215
237
  def test_hybrid_in_create_statement(self):
216
238
  ks = kinds("CREATE COLLECTION articles HYBRID")
217
239
  assert ks[3] == TokenKind.HYBRID
@@ -24,6 +24,8 @@ from qql.ast_nodes import (
24
24
  QuantizationConfig,
25
25
  QuantizationType,
26
26
  RecommendStmt,
27
+ SelectStmt,
28
+ ScrollStmt,
27
29
  SearchStmt,
28
30
  SearchWith,
29
31
  ShowCollectionsStmt,
@@ -189,6 +191,51 @@ class TestShow:
189
191
  assert isinstance(node, ShowCollectionsStmt)
190
192
 
191
193
 
194
+ class TestScroll:
195
+ def test_scroll_basic(self):
196
+ node = parse("SCROLL FROM docs LIMIT 50")
197
+ assert isinstance(node, ScrollStmt)
198
+ assert node.collection == "docs"
199
+ assert node.limit == 50
200
+ assert node.query_filter is None
201
+ assert node.after is None
202
+
203
+ def test_scroll_with_where(self):
204
+ node = parse("SCROLL FROM docs WHERE year >= 2024 LIMIT 50")
205
+ assert isinstance(node, ScrollStmt)
206
+ assert isinstance(node.query_filter, CompareExpr)
207
+ assert node.query_filter.field == "year"
208
+ assert node.after is None
209
+
210
+ def test_scroll_with_after(self):
211
+ node = parse("SCROLL FROM docs AFTER 'cursor-id' LIMIT 50")
212
+ assert isinstance(node, ScrollStmt)
213
+ assert node.after == "cursor-id"
214
+
215
+ def test_scroll_with_where_and_after(self):
216
+ node = parse("SCROLL FROM docs WHERE year >= 2024 AFTER 42 LIMIT 50")
217
+ assert isinstance(node, ScrollStmt)
218
+ assert node.after == 42
219
+ assert isinstance(node.query_filter, CompareExpr)
220
+
221
+
222
+ class TestSelect:
223
+ def test_select_by_string_id(self):
224
+ node = parse("SELECT * FROM notes WHERE id = 'abc-123'")
225
+ assert isinstance(node, SelectStmt)
226
+ assert node.collection == "notes"
227
+ assert node.point_id == "abc-123"
228
+
229
+ def test_select_by_integer_id(self):
230
+ node = parse("SELECT * FROM notes WHERE id = 42")
231
+ assert isinstance(node, SelectStmt)
232
+ assert node.point_id == 42
233
+
234
+ def test_select_requires_id_filter(self):
235
+ with pytest.raises(QQLSyntaxError):
236
+ parse("SELECT * FROM notes WHERE year = 2024")
237
+
238
+
192
239
  class TestSearch:
193
240
  def test_basic_search(self):
194
241
  node = parse("SEARCH notes SIMILAR TO 'hello world' LIMIT 5")
@@ -334,7 +381,7 @@ class TestRecommend:
334
381
  class TestErrors:
335
382
  def test_unknown_keyword(self):
336
383
  with pytest.raises(QQLSyntaxError):
337
- parse("SELECT * FROM foo")
384
+ parse("UPSERT INTO foo VALUES {'text': 'x'}")
338
385
 
339
386
  def test_missing_collection_name(self):
340
387
  with pytest.raises(QQLSyntaxError):
@@ -704,6 +751,24 @@ class TestHybridSearch:
704
751
  assert isinstance(node.query_filter, CompareExpr)
705
752
  assert node.query_filter.field == "year"
706
753
 
754
+ def test_search_hybrid_with_dbsf_fusion(self):
755
+ node = parse(
756
+ "SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'dbsf'"
757
+ )
758
+ assert node.hybrid is True
759
+ assert node.fusion == "dbsf"
760
+
761
+ def test_search_hybrid_with_fusion_and_models(self):
762
+ node = parse(
763
+ "SEARCH docs SIMILAR TO 'q' LIMIT 10 "
764
+ "USING HYBRID FUSION 'rrf' SPARSE MODEL 'Qdrant/bm25' "
765
+ "DENSE MODEL 'BAAI/bge-base-en-v1.5'"
766
+ )
767
+ assert node.hybrid is True
768
+ assert node.fusion == "rrf"
769
+ assert node.sparse_model == "Qdrant/bm25"
770
+ assert node.model == "BAAI/bge-base-en-v1.5"
771
+
707
772
  def test_search_hybrid_dense_model_and_where(self):
708
773
  node = parse(
709
774
  "SEARCH articles SIMILAR TO 'ml' LIMIT 10 "
@@ -713,6 +778,10 @@ class TestHybridSearch:
713
778
  assert node.model == "BAAI/bge-small-en-v1.5"
714
779
  assert isinstance(node.query_filter, CompareExpr)
715
780
 
781
+ def test_search_hybrid_rejects_unknown_fusion(self):
782
+ with pytest.raises(QQLSyntaxError, match="Unsupported hybrid fusion"):
783
+ parse("SEARCH docs SIMILAR TO 'q' LIMIT 10 USING HYBRID FUSION 'x'")
784
+
716
785
  def test_search_hybrid_limit_preserved(self):
717
786
  node = parse("SEARCH col SIMILAR TO 'q' LIMIT 7 USING HYBRID")
718
787
  assert node.limit == 7
@@ -111,6 +111,30 @@ class TestSplitStatements:
111
111
  assert len(chunks) == 3
112
112
  assert chunks[1][0].kind == TokenKind.RECOMMEND
113
113
 
114
+ def test_scroll_starts_new_top_level_statement(self):
115
+ from qql.lexer import TokenKind
116
+
117
+ tokens = tokenize(
118
+ "SHOW COLLECTIONS\n"
119
+ "SCROLL FROM x LIMIT 10\n"
120
+ "DROP COLLECTION x"
121
+ )
122
+ chunks = split_statements(tokens)
123
+ assert len(chunks) == 3
124
+ assert chunks[1][0].kind == TokenKind.SCROLL
125
+
126
+ def test_select_starts_new_top_level_statement(self):
127
+ from qql.lexer import TokenKind
128
+
129
+ tokens = tokenize(
130
+ "SHOW COLLECTIONS\n"
131
+ "SELECT * FROM x WHERE id = 'id-1'\n"
132
+ "DROP COLLECTION x"
133
+ )
134
+ chunks = split_statements(tokens)
135
+ assert len(chunks) == 3
136
+ assert chunks[1][0].kind == TokenKind.SELECT
137
+
114
138
 
115
139
  # ── run_script ────────────────────────────────────────────────────────────────
116
140
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes