qql-cli 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {qql_cli-2.0.0 → qql_cli-2.1.0}/PKG-INFO +8 -5
  2. {qql_cli-2.0.0 → qql_cli-2.1.0}/README.md +5 -2
  3. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/collections.md +52 -14
  4. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/scripts.md +3 -0
  5. {qql_cli-2.0.0 → qql_cli-2.1.0}/pyproject.toml +3 -3
  6. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/ast_nodes.py +4 -2
  7. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/cli.py +11 -2
  8. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/dumper.py +11 -6
  9. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/executor.py +29 -3
  10. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/lexer.py +5 -1
  11. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/parser.py +25 -1
  12. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_dumper.py +39 -5
  13. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_executor.py +107 -0
  14. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_parser.py +80 -0
  15. {qql_cli-2.0.0 → qql_cli-2.1.0}/.github/workflows/ci.yml +0 -0
  16. {qql_cli-2.0.0 → qql_cli-2.1.0}/.github/workflows/publish.yml +0 -0
  17. {qql_cli-2.0.0 → qql_cli-2.1.0}/.gitignore +0 -0
  18. {qql_cli-2.0.0 → qql_cli-2.1.0}/LICENSE +0 -0
  19. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/_config.yml +0 -0
  20. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/filters.md +0 -0
  21. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/getting-started.md +0 -0
  22. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/index.html +0 -0
  23. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/insert.md +0 -0
  24. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/programmatic.md +0 -0
  25. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/reference.md +0 -0
  26. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/robots.txt +0 -0
  27. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/search.md +0 -0
  28. {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/sitemap.xml +0 -0
  29. {qql_cli-2.0.0 → qql_cli-2.1.0}/main.py +0 -0
  30. {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/Features.md +0 -0
  31. {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/sample.qql +0 -0
  32. {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/sample_v2.qql +0 -0
  33. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/__init__.py +0 -0
  34. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/config.py +0 -0
  35. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/embedder.py +0 -0
  36. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/exceptions.py +0 -0
  37. {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/script.py +0 -0
  38. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/__init__.py +0 -0
  39. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_lexer.py +0 -0
  40. {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_script.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qql-cli
3
- Version: 2.0.0
4
- Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), WHERE clause filters, script execution, and collection dump/restore.
3
+ Version: 2.1.0
4
+ Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore.
5
5
  Project-URL: Homepage, https://github.com/pavanjava/qql
6
6
  Project-URL: Repository, https://github.com/pavanjava/qql
7
7
  Project-URL: Documentation, https://pavanjava.github.io/qql
@@ -45,7 +45,7 @@ Classifier: Topic :: Utilities
45
45
  Requires-Python: >=3.12
46
46
  Requires-Dist: click>=8.1.0
47
47
  Requires-Dist: prompt-toolkit>=3.0.0
48
- Requires-Dist: qdrant-client[fastembed]>=1.13.0
48
+ Requires-Dist: qdrant-client[fastembed]>=1.18.0
49
49
  Requires-Dist: rich>=13.0.0
50
50
  Description-Content-Type: text/markdown
51
51
 
@@ -58,7 +58,7 @@ Description-Content-Type: text/markdown
58
58
  [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
59
59
  [![Tests](https://img.shields.io/badge/tests-375%20passing-brightgreen)](tests/)
60
60
 
61
- Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
61
+ Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
62
62
 
63
63
  ```
64
64
  qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
@@ -135,7 +135,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
135
135
  | [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
136
136
  | [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
137
137
  | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
138
- | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/binary/product), CREATE INDEX |
138
+ | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
139
139
  | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
140
140
  | [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
141
141
  | [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
@@ -162,6 +162,9 @@ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
162
162
  CREATE COLLECTION articles
163
163
  CREATE COLLECTION articles HYBRID
164
164
  CREATE COLLECTION articles QUANTIZE SCALAR
165
+ CREATE COLLECTION articles QUANTIZE TURBO
166
+ CREATE COLLECTION articles QUANTIZE TURBO BITS 2
167
+ CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5 ALWAYS RAM
165
168
  CREATE INDEX ON COLLECTION articles FOR year TYPE integer
166
169
  SHOW COLLECTIONS
167
170
  DROP COLLECTION articles
@@ -7,7 +7,7 @@
7
7
  [![MIT License](https://img.shields.io/badge/license-MIT-green)](LICENSE)
8
8
  [![Tests](https://img.shields.io/badge/tests-375%20passing-brightgreen)](tests/)
9
9
 
10
- Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
10
+ Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
11
11
 
12
12
  ```
13
13
  qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
@@ -84,7 +84,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
84
84
  | [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
85
85
  | [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
86
86
  | [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
87
- | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/binary/product), CREATE INDEX |
87
+ | [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
88
88
  | [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
89
89
  | [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
90
90
  | [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
@@ -111,6 +111,9 @@ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
111
111
  CREATE COLLECTION articles
112
112
  CREATE COLLECTION articles HYBRID
113
113
  CREATE COLLECTION articles QUANTIZE SCALAR
114
+ CREATE COLLECTION articles QUANTIZE TURBO
115
+ CREATE COLLECTION articles QUANTIZE TURBO BITS 2
116
+ CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5 ALWAYS RAM
114
117
  CREATE INDEX ON COLLECTION articles FOR year TYPE integer
115
118
  SHOW COLLECTIONS
116
119
  DROP COLLECTION articles
@@ -67,27 +67,38 @@ When `USING MODEL` is omitted, the collection uses the **default embedding model
67
67
 
68
68
  ## Quantization — QUANTIZE clause
69
69
 
70
- Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all three Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
70
+ Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all four Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
71
71
 
72
- **Three strategies:**
72
+ **Four strategies:**
73
73
 
74
- | Type | Compression | Accuracy Loss | Best For |
74
+ | Type | Compression | Accuracy | Best For |
75
75
  |---|---|---|---|
76
- | `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance |
77
- | `BINARY` | 32× (float32 1-bit) | Higher | High-dimensional vectors (768+), speed priority |
76
+ | `SCALAR` | 4× (float32 → int8) | < 1% loss | Most collections — best balance |
77
+ | `TURBO` | 8–32× (4-bit to 1-bit) | Low–medium | Better recall than BINARY at same storage budget |
78
+ | `BINARY` | 32× (float32 → 1-bit) | Higher loss | Speed priority; centered distributions only |
78
79
  | `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments |
79
80
 
80
81
  **Full syntax:**
81
82
  ```
82
83
  CREATE COLLECTION <name> ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM]
84
+ CREATE COLLECTION <name> ... QUANTIZE TURBO [BITS <1|1.5|2|4>] [ALWAYS RAM]
83
85
  CREATE COLLECTION <name> ... QUANTIZE BINARY [ALWAYS RAM]
84
86
  CREATE COLLECTION <name> ... QUANTIZE PRODUCT [ALWAYS RAM]
85
87
  ```
86
88
 
87
- - **`QUANTILE <float>`** — (scalar only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted.
88
- - **`ALWAYS RAM`**keep the **quantized** vectors in RAM at all times, regardless of the collection's `on_disk` setting. Improves search throughput at the cost of higher RAM usage for the compressed index. The original full-precision vectors are stored and managed independently of this flag. Supported by all three quantization types.
89
+ - **`QUANTILE <float>`** — (SCALAR only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted.
90
+ - **`BITS <depth>`**(TURBO only) bit depth passed to the Qdrant SDK:
91
+ - `4` — 4-bit (default when `BITS` is omitted; server applies its own default)
92
+ - `2` — 2-bit
93
+ - `1.5` — 1.5-bit
94
+ - `1` — 1-bit
95
+ > Compression ratios (8×, 16×, 24×, 32×) and recall characteristics are
96
+ > Qdrant server-side behaviors. QQL maps the `BITS` value to the SDK model and
97
+ > passes it to Qdrant; actual results depend on your Qdrant server version.
98
+ - **`ALWAYS RAM`** — keep the **quantized** vectors in RAM at all times, regardless of the collection's `on_disk` setting. Improves search throughput at the cost of higher RAM usage for the compressed index. The original full-precision vectors are stored and managed independently of this flag. Supported by all four quantization types.
89
99
  - **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.).
90
100
  - For `PRODUCT`, the compression ratio is fixed at **4×** in this version.
101
+ - For `TURBO`, Cosine, Dot, and Euclidean distance are supported by the Qdrant server when TurboQuant is enabled.
91
102
  - When used with `HYBRID` collections, quantization applies only to the **dense** vector.
92
103
 
93
104
  **Examples:**
@@ -102,6 +113,26 @@ Scalar with explicit calibration and quantized vectors pinned to RAM:
102
113
  CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM
103
114
  ```
104
115
 
116
+ TurboQuant — default 4-bit (8× compression, good recall):
117
+ ```sql
118
+ CREATE COLLECTION research_papers QUANTIZE TURBO
119
+ ```
120
+
121
+ TurboQuant — 2-bit (16× compression):
122
+ ```sql
123
+ CREATE COLLECTION research_papers QUANTIZE TURBO BITS 2
124
+ ```
125
+
126
+ TurboQuant — 1.5-bit (24× compression) with quantized vectors pinned to RAM:
127
+ ```sql
128
+ CREATE COLLECTION research_papers QUANTIZE TURBO BITS 1.5 ALWAYS RAM
129
+ ```
130
+
131
+ TurboQuant — 1-bit (32× compression, same ratio as BINARY but better recall):
132
+ ```sql
133
+ CREATE COLLECTION research_papers QUANTIZE TURBO BITS 1
134
+ ```
135
+
105
136
  Binary quantization for large high-dimensional embeddings:
106
137
  ```sql
107
138
  CREATE COLLECTION research_papers QUANTIZE BINARY
@@ -115,22 +146,29 @@ CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM
115
146
  Combined with hybrid collection:
116
147
  ```sql
117
148
  CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR
149
+ CREATE COLLECTION research_papers HYBRID QUANTIZE TURBO BITS 2
118
150
  ```
119
151
 
120
152
  Combined with a pinned model:
121
153
  ```sql
122
154
  CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99
155
+ CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO BITS 2
156
+ ```
157
+
158
+ Combined with hybrid + dense model:
159
+ ```sql
160
+ CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO
123
161
  ```
124
162
 
125
163
  **Valid combinations:**
126
164
 
127
- | Base form | + QUANTIZE SCALAR | + QUANTIZE BINARY | + QUANTIZE PRODUCT |
128
- |---|---|---|---|
129
- | `CREATE COLLECTION name` | ✓ | ✓ | ✓ |
130
- | `... HYBRID` | ✓ | ✓ | ✓ |
131
- | `... USING MODEL 'x'` | ✓ | ✓ | ✓ |
132
- | `... USING HYBRID` | ✓ | ✓ | ✓ |
133
- | `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ |
165
+ | Base form | + SCALAR | + TURBO | + BINARY | + PRODUCT |
166
+ |---|---|---|---|---|
167
+ | `CREATE COLLECTION name` | ✓ | ✓ | ✓ | ✓ |
168
+ | `... HYBRID` | ✓ | ✓ | ✓ | ✓ |
169
+ | `... USING MODEL 'x'` | ✓ | ✓ | ✓ | ✓ |
170
+ | `... USING HYBRID` | ✓ | ✓ | ✓ | ✓ |
171
+ | `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ | ✓ |
134
172
 
135
173
  > INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed.
136
174
 
@@ -79,6 +79,9 @@ Export every point in a collection to a `.qql` script file. The generated file i
79
79
  **CLI usage:**
80
80
  ```bash
81
81
  qql dump <collection_name> <output.qql>
82
+
83
+ # Override the default 50 points/INSERT BULK batch
84
+ qql dump <collection_name> <output.qql> --batch-size 200
82
85
  ```
83
86
 
84
87
  **In-shell usage (inside the QQL REPL):**
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "qql-cli"
3
- version = "2.0.0"
4
- description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), WHERE clause filters, script execution, and collection dump/restore."
3
+ version = "2.1.0"
4
+ description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore."
5
5
  readme = "README.md"
6
6
  license = { file = "LICENSE" }
7
7
  requires-python = ">=3.12"
@@ -37,7 +37,7 @@ classifiers = [
37
37
  "Topic :: Text Processing :: Indexing",
38
38
  ]
39
39
  dependencies = [
40
- "qdrant-client[fastembed]>=1.13.0",
40
+ "qdrant-client[fastembed]>=1.18.0",
41
41
  "click>=8.1.0",
42
42
  "rich>=13.0.0",
43
43
  "prompt_toolkit>=3.0.0",
@@ -9,14 +9,16 @@ class QuantizationType(Enum):
9
9
  SCALAR = "scalar"
10
10
  BINARY = "binary"
11
11
  PRODUCT = "product"
12
+ TURBO = "turbo"
12
13
 
13
14
 
14
15
  @dataclass(frozen=True)
15
16
  class QuantizationConfig:
16
17
  """Quantization settings parsed from a QUANTIZE clause."""
17
18
  type: QuantizationType
18
- quantile: float | None = None # SCALAR only; None → Qdrant default (0.99)
19
- always_ram: bool = False # all types; default False
19
+ quantile: float | None = None # SCALAR only; None → Qdrant default (0.99)
20
+ always_ram: bool = False # all types; default False
21
+ turbo_bits: float | None = None # TURBO only; None → bits4 (Qdrant default 4-bit, 8×)
20
22
 
21
23
 
22
24
  @dataclass(frozen=True)
@@ -201,7 +201,14 @@ def execute(file: str, stop_on_error: bool) -> None:
201
201
  @main.command()
202
202
  @click.argument("collection")
203
203
  @click.argument("output", type=click.Path())
204
- def dump(collection: str, output: str) -> None:
204
+ @click.option(
205
+ "--batch-size",
206
+ type=click.IntRange(min=1),
207
+ default=50,
208
+ show_default=True,
209
+ help="Points per INSERT BULK batch in the generated script.",
210
+ )
211
+ def dump(collection: str, output: str, batch_size: int) -> None:
205
212
  """Dump a collection to a .qql script file.
206
213
 
207
214
  OUTPUT is the path for the generated .qql file.
@@ -230,7 +237,9 @@ def dump(collection: str, output: str) -> None:
230
237
  console.print(
231
238
  f"[bold cyan]Dumping:[/bold cyan] '{collection}' -> {output}\n"
232
239
  )
233
- written, skipped = dump_collection(collection, output, client, console, err_console)
240
+ written, skipped = dump_collection(
241
+ collection, output, client, console, err_console, batch_size=batch_size
242
+ )
234
243
 
235
244
  if written == 0 and skipped == 0:
236
245
  # collection not found — error already printed by dump_collection
@@ -3,7 +3,8 @@
3
3
  The generated file contains:
4
4
  1. A header comment with metadata
5
5
  2. CREATE COLLECTION <name> [HYBRID]
6
- 3. One INSERT BULK statement per batch of _DUMP_BATCH_SIZE points
6
+ 3. One INSERT BULK statement per batch of *batch_size* points
7
+ (default _DEFAULT_DUMP_BATCH_SIZE = 50, overridable via the CLI flag)
7
8
  4. A footer comment with totals
8
9
 
9
10
  The file is valid QQL and can be re-executed with ``qql execute <file>``.
@@ -20,7 +21,7 @@ from typing import Any
20
21
  from qdrant_client import QdrantClient
21
22
  from rich.console import Console
22
23
 
23
- _DUMP_BATCH_SIZE = 50
24
+ _DEFAULT_DUMP_BATCH_SIZE = 50
24
25
 
25
26
 
26
27
  # ── Value serializer ──────────────────────────────────────────────────────────
@@ -81,12 +82,16 @@ def dump_collection(
81
82
  client: QdrantClient,
82
83
  console: Console,
83
84
  err_console: Console,
85
+ batch_size: int = _DEFAULT_DUMP_BATCH_SIZE,
84
86
  ) -> tuple[int, int]:
85
87
  """Export every point in *collection* to a .qql script at *output_path*.
86
88
 
87
89
  Returns ``(points_written, points_skipped)`` counts.
88
90
  Points without a ``'text'`` key are skipped and counted in *points_skipped*.
89
91
  """
92
+ if batch_size <= 0:
93
+ raise ValueError(f"batch_size must be a positive integer, got {batch_size}")
94
+
90
95
  if not client.collection_exists(collection):
91
96
  err_console.print(
92
97
  f"[bold red]Error:[/bold red] Collection '{collection}' does not exist."
@@ -100,13 +105,13 @@ def dump_collection(
100
105
  # ── First pass: count total points for the header ─────────────────────
101
106
  count_info = client.count(collection_name=collection, exact=True)
102
107
  total_points = count_info.count
103
- total_batches = max(1, math.ceil(total_points / _DUMP_BATCH_SIZE))
108
+ total_batches = max(1, math.ceil(total_points / batch_size))
104
109
 
105
110
  console.print(
106
111
  f" Collection type : [cyan]{col_type}[/cyan]\n"
107
112
  f" Points : [cyan]{total_points}[/cyan]\n"
108
113
  f" Batches : [cyan]{total_batches}[/cyan] "
109
- f"([dim]{_DUMP_BATCH_SIZE} points/batch[/dim])\n"
114
+ f"([dim]{batch_size} points/batch[/dim])\n"
110
115
  )
111
116
 
112
117
  out = Path(output_path)
@@ -140,7 +145,7 @@ def dump_collection(
140
145
  while True:
141
146
  records, next_offset = client.scroll(
142
147
  collection_name=collection,
143
- limit=_DUMP_BATCH_SIZE,
148
+ limit=batch_size,
144
149
  offset=offset,
145
150
  with_payload=True,
146
151
  with_vectors=False,
@@ -150,7 +155,7 @@ def dump_collection(
150
155
  break
151
156
 
152
157
  batch_num += 1
153
- batch_start = (batch_num - 1) * _DUMP_BATCH_SIZE + 1
158
+ batch_start = (batch_num - 1) * batch_size + 1
154
159
  batch_end = batch_start + len(records) - 1
155
160
 
156
161
  # Filter points that have a 'text' field
@@ -41,6 +41,9 @@ from qdrant_client.models import (
41
41
  ScalarQuantization,
42
42
  ScalarQuantizationConfig,
43
43
  ScalarType,
44
+ TurboQuantBitSize,
45
+ TurboQuantization,
46
+ TurboQuantQuantizationConfig,
44
47
  SearchParams,
45
48
  SparseVector,
46
49
  SparseVectorParams,
@@ -81,6 +84,7 @@ from .config import QQLConfig
81
84
  from .embedder import CrossEncoderEmbedder, Embedder, SparseEmbedder
82
85
 
83
86
  _RERANK_FETCH_MULTIPLIER = 4
87
+ _HYBRID_PREFETCH_MULTIPLIER = 4
84
88
  _COLLECTION_VISIBILITY_TIMEOUT_SECONDS = 5.0
85
89
  _COLLECTION_VISIBILITY_POLL_SECONDS = 0.05
86
90
  from .exceptions import QQLRuntimeError
@@ -446,13 +450,13 @@ class Executor:
446
450
  Prefetch(
447
451
  query=dense_vector,
448
452
  using="dense",
449
- limit=node.limit * 4,
453
+ limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER,
450
454
  params=search_params,
451
455
  ),
452
456
  Prefetch(
453
457
  query=sparse_vector,
454
458
  using="sparse",
455
- limit=node.limit * 4,
459
+ limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER,
456
460
  params=search_params,
457
461
  ),
458
462
  ],
@@ -846,7 +850,7 @@ class Executor:
846
850
 
847
851
  def _build_quantization_config(
848
852
  self, qc: QuantizationConfig
849
- ) -> ScalarQuantization | BinaryQuantization | ProductQuantization:
853
+ ) -> ScalarQuantization | BinaryQuantization | ProductQuantization | TurboQuantization:
850
854
  """Convert a parsed QuantizationConfig to a Qdrant SDK quantization object."""
851
855
  if qc.type == QuantizationType.SCALAR:
852
856
  return ScalarQuantization(
@@ -867,6 +871,28 @@ class Executor:
867
871
  always_ram=qc.always_ram,
868
872
  )
869
873
  )
874
+ if qc.type == QuantizationType.TURBO:
875
+ _BITS_MAP: dict[float, TurboQuantBitSize] = {
876
+ 4.0: TurboQuantBitSize.BITS4,
877
+ 2.0: TurboQuantBitSize.BITS2,
878
+ 1.5: TurboQuantBitSize.BITS1_5,
879
+ 1.0: TurboQuantBitSize.BITS1,
880
+ }
881
+ if qc.turbo_bits is None:
882
+ bits_enum = None # user omitted BITS → preserve None, server applies default
883
+ elif qc.turbo_bits in _BITS_MAP:
884
+ bits_enum = _BITS_MAP[qc.turbo_bits]
885
+ else:
886
+ raise QQLRuntimeError(
887
+ f"Unsupported TURBO bit depth: {qc.turbo_bits}. "
888
+ f"Valid values: 1, 1.5, 2, 4"
889
+ )
890
+ return TurboQuantization(
891
+ turbo=TurboQuantQuantizationConfig(
892
+ bits=bits_enum,
893
+ always_ram=qc.always_ram,
894
+ )
895
+ )
870
896
  raise QQLRuntimeError(f"Unknown quantization type: {qc.type}")
871
897
 
872
898
  def _collection_is_hybrid(self, name: str) -> bool:
@@ -27,6 +27,8 @@ class TokenKind(Enum):
27
27
  QUANTILE = auto()
28
28
  ALWAYS = auto()
29
29
  RAM = auto()
30
+ TURBO = auto()
31
+ BITS = auto()
30
32
  CREATE = auto()
31
33
  INDEX = auto()
32
34
  ON = auto()
@@ -113,6 +115,8 @@ _KEYWORDS: dict[str, TokenKind] = {
113
115
  "QUANTILE": TokenKind.QUANTILE,
114
116
  "ALWAYS": TokenKind.ALWAYS,
115
117
  "RAM": TokenKind.RAM,
118
+ "TURBO": TokenKind.TURBO,
119
+ "BITS": TokenKind.BITS,
116
120
  "CREATE": TokenKind.CREATE,
117
121
  "INDEX": TokenKind.INDEX,
118
122
  "ON": TokenKind.ON,
@@ -209,7 +213,7 @@ class Lexer:
209
213
  tokens.append(Token(TokenKind.NOT_EQUALS, "!=", i))
210
214
  i += 2
211
215
  else:
212
- raise QQLSyntaxError(f"Unexpected character '!'", i)
216
+ raise QQLSyntaxError("Unexpected character '!'", i)
213
217
  elif ch == ">":
214
218
  if i + 1 < n and query[i + 1] == "=":
215
219
  tokens.append(Token(TokenKind.GTE, ">=", i))
@@ -248,8 +248,32 @@ class Parser:
248
248
  always_ram = True
249
249
  return QuantizationConfig(type=QuantizationType.PRODUCT, always_ram=always_ram)
250
250
 
251
+ if tok.kind == TokenKind.TURBO:
252
+ self._advance()
253
+ turbo_bits: float | None = None
254
+ always_ram = False
255
+ if self._peek().kind == TokenKind.BITS:
256
+ self._advance()
257
+ bits_tok = self._peek()
258
+ raw = float(self._parse_number())
259
+ if raw not in (1.0, 1.5, 2.0, 4.0):
260
+ raise QQLSyntaxError(
261
+ f"BITS must be one of 1, 1.5, 2, or 4 for TURBO quantization, got {raw}",
262
+ bits_tok.pos,
263
+ )
264
+ turbo_bits = raw
265
+ if self._peek().kind == TokenKind.ALWAYS:
266
+ self._advance()
267
+ self._expect(TokenKind.RAM)
268
+ always_ram = True
269
+ return QuantizationConfig(
270
+ type=QuantizationType.TURBO,
271
+ turbo_bits=turbo_bits,
272
+ always_ram=always_ram,
273
+ )
274
+
251
275
  raise QQLSyntaxError(
252
- f"Expected SCALAR, BINARY, or PRODUCT after QUANTIZE, got '{tok.value}'",
276
+ f"Expected SCALAR, BINARY, PRODUCT, or TURBO after QUANTIZE, got '{tok.value}'",
253
277
  tok.pos,
254
278
  )
255
279
 
@@ -5,7 +5,7 @@ import pytest
5
5
  from rich.console import Console
6
6
 
7
7
  from qql.dumper import (
8
- _DUMP_BATCH_SIZE,
8
+ _DEFAULT_DUMP_BATCH_SIZE,
9
9
  _is_hybrid,
10
10
  _serialize_dict,
11
11
  _serialize_value,
@@ -32,7 +32,7 @@ def _make_client(mocker, *, exists=True, hybrid=False, points=None, total=None):
32
32
  """Build a mock QdrantClient for dump tests.
33
33
 
34
34
  *points* is a list of payload dicts. scroll() returns them all in one
35
- batch when len(points) <= _DUMP_BATCH_SIZE, else two batches.
35
+ batch when len(points) <= _DEFAULT_DUMP_BATCH_SIZE, else two batches.
36
36
  """
37
37
  points = points or []
38
38
  client = mocker.MagicMock()
@@ -202,10 +202,10 @@ class TestDumpCollection:
202
202
  client.collection_exists.return_value = True
203
203
  client.get_collection.return_value.config.params.vectors = mocker.MagicMock(spec=[])
204
204
  cnt = mocker.MagicMock()
205
- cnt.count = _DUMP_BATCH_SIZE + 1
205
+ cnt.count = _DEFAULT_DUMP_BATCH_SIZE + 1
206
206
  client.count.return_value = cnt
207
207
 
208
- batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(_DUMP_BATCH_SIZE)]
208
+ batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(_DEFAULT_DUMP_BATCH_SIZE)]
209
209
  batch2 = [_make_record(mocker, {"text": "last doc"}, "id-last")]
210
210
  # First scroll call returns batch1 with a non-None offset; second returns batch2 + None
211
211
  client.scroll.side_effect = [
@@ -215,7 +215,7 @@ class TestDumpCollection:
215
215
 
216
216
  written, skipped = dump_collection("col", out, client, null_console(), null_console())
217
217
  content = (tmp_path / "dump.qql").read_text()
218
- assert written == _DUMP_BATCH_SIZE + 1
218
+ assert written == _DEFAULT_DUMP_BATCH_SIZE + 1
219
219
  assert content.count("INSERT BULK") == 2
220
220
 
221
221
  def test_header_contains_collection_name(self, tmp_path, mocker):
@@ -230,3 +230,37 @@ class TestDumpCollection:
230
230
  client = _make_client(mocker, points=[{"text": "x"}])
231
231
  dump_collection("col", out, client, null_console(), null_console())
232
232
  assert (tmp_path / "sub" / "dir" / "dump.qql").exists()
233
+
234
+ def test_custom_batch_size_splits_pages(self, tmp_path, mocker):
235
+ """A batch_size of 2 over 3 points should produce two INSERT BULK blocks."""
236
+ out = str(tmp_path / "dump.qql")
237
+ client = mocker.MagicMock()
238
+ client.collection_exists.return_value = True
239
+ client.get_collection.return_value.config.params.vectors = mocker.MagicMock(spec=[])
240
+ cnt = mocker.MagicMock()
241
+ cnt.count = 3
242
+ client.count.return_value = cnt
243
+
244
+ batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(2)]
245
+ batch2 = [_make_record(mocker, {"text": "last"}, "id-last")]
246
+ client.scroll.side_effect = [
247
+ (batch1, "offset-1"),
248
+ (batch2, None),
249
+ ]
250
+
251
+ written, _ = dump_collection(
252
+ "col", out, client, null_console(), null_console(), batch_size=2
253
+ )
254
+ content = (tmp_path / "dump.qql").read_text()
255
+ assert written == 3
256
+ assert content.count("INSERT BULK") == 2
257
+ # client.scroll should have been called with limit=2
258
+ assert client.scroll.call_args_list[0].kwargs["limit"] == 2
259
+
260
+ def test_invalid_batch_size_raises(self, tmp_path, mocker):
261
+ out = str(tmp_path / "dump.qql")
262
+ client = _make_client(mocker, points=[{"text": "x"}])
263
+ with pytest.raises(ValueError):
264
+ dump_collection(
265
+ "col", out, client, null_console(), null_console(), batch_size=0
266
+ )
@@ -1640,3 +1640,110 @@ class TestQuantizeCreate:
1640
1640
  node = CreateCollectionStmt(collection="articles")
1641
1641
  result = executor.execute(node)
1642
1642
  assert "quantization" not in result.message
1643
+
1644
+
1645
+ class TestTurboQuantCreate:
1646
+ """Executor tests for QUANTIZE TURBO — verifies correct SDK objects are built."""
1647
+
1648
+ @pytest.fixture
1649
+ def executor(self, cfg, mock_client):
1650
+ return Executor(mock_client, cfg)
1651
+
1652
+ # ── TurboQuantization object is produced ──────────────────────────────
1653
+
1654
+ def test_turbo_passes_turbo_quantization(self, executor, mock_client):
1655
+ from qdrant_client.models import TurboQuantization
1656
+ node = CreateCollectionStmt(
1657
+ collection="articles",
1658
+ quantization=QuantizationConfig(type=QuantizationType.TURBO),
1659
+ )
1660
+ executor.execute(node)
1661
+ kw = mock_client.create_collection.call_args.kwargs
1662
+ assert isinstance(kw.get("quantization_config"), TurboQuantization)
1663
+
1664
+ def test_turbo_default_bits_is_none(self, executor, mock_client):
1665
+ """When BITS is omitted, bits must be None — preserving omission so the
1666
+ SDK/server applies its own default rather than QQL forcing BITS4."""
1667
+ node = CreateCollectionStmt(
1668
+ collection="articles",
1669
+ quantization=QuantizationConfig(type=QuantizationType.TURBO),
1670
+ )
1671
+ executor.execute(node)
1672
+ kw = mock_client.create_collection.call_args.kwargs
1673
+ assert kw["quantization_config"].turbo.bits is None
1674
+
1675
+ def test_turbo_bits2(self, executor, mock_client):
1676
+ from qdrant_client.models import TurboQuantBitSize
1677
+ node = CreateCollectionStmt(
1678
+ collection="articles",
1679
+ quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=2.0),
1680
+ )
1681
+ executor.execute(node)
1682
+ kw = mock_client.create_collection.call_args.kwargs
1683
+ assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS2
1684
+
1685
+ def test_turbo_bits1_5(self, executor, mock_client):
1686
+ from qdrant_client.models import TurboQuantBitSize
1687
+ node = CreateCollectionStmt(
1688
+ collection="articles",
1689
+ quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=1.5),
1690
+ )
1691
+ executor.execute(node)
1692
+ kw = mock_client.create_collection.call_args.kwargs
1693
+ assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS1_5
1694
+
1695
+ def test_turbo_bits1(self, executor, mock_client):
1696
+ from qdrant_client.models import TurboQuantBitSize
1697
+ node = CreateCollectionStmt(
1698
+ collection="articles",
1699
+ quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=1.0),
1700
+ )
1701
+ executor.execute(node)
1702
+ kw = mock_client.create_collection.call_args.kwargs
1703
+ assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS1
1704
+
1705
+ def test_turbo_always_ram_true(self, executor, mock_client):
1706
+ node = CreateCollectionStmt(
1707
+ collection="articles",
1708
+ quantization=QuantizationConfig(type=QuantizationType.TURBO, always_ram=True),
1709
+ )
1710
+ executor.execute(node)
1711
+ kw = mock_client.create_collection.call_args.kwargs
1712
+ assert kw["quantization_config"].turbo.always_ram is True
1713
+
1714
+ def test_turbo_always_ram_false_by_default(self, executor, mock_client):
1715
+ node = CreateCollectionStmt(
1716
+ collection="articles",
1717
+ quantization=QuantizationConfig(type=QuantizationType.TURBO),
1718
+ )
1719
+ executor.execute(node)
1720
+ kw = mock_client.create_collection.call_args.kwargs
1721
+ assert kw["quantization_config"].turbo.always_ram is False
1722
+
1723
+ def test_turbo_hybrid_collection_has_both_configs(self, executor, mock_client):
1724
+ from qdrant_client.models import TurboQuantization
1725
+ node = CreateCollectionStmt(
1726
+ collection="articles",
1727
+ hybrid=True,
1728
+ quantization=QuantizationConfig(type=QuantizationType.TURBO),
1729
+ )
1730
+ executor.execute(node)
1731
+ kw = mock_client.create_collection.call_args.kwargs
1732
+ assert isinstance(kw.get("quantization_config"), TurboQuantization)
1733
+ assert "sparse_vectors_config" in kw
1734
+
1735
+ def test_turbo_result_message_includes_turbo(self, executor, mock_client):
1736
+ node = CreateCollectionStmt(
1737
+ collection="articles",
1738
+ quantization=QuantizationConfig(type=QuantizationType.TURBO),
1739
+ )
1740
+ result = executor.execute(node)
1741
+ assert "turbo" in result.message
1742
+
1743
+ def test_turbo_invalid_bits_at_executor_raises(self, executor, mock_client):
1744
+ """An unexpected turbo_bits value that bypasses parser validation must
1745
+ raise QQLRuntimeError explicitly instead of silently coercing to BITS4."""
1746
+ from qql.exceptions import QQLRuntimeError as QQLErr
1747
+ qc = QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=3.0)
1748
+ with pytest.raises(QQLErr, match="Unsupported TURBO bit depth"):
1749
+ executor._build_quantization_config(qc)
@@ -1031,3 +1031,83 @@ class TestQuantizeCreate:
1031
1031
  def test_scalar_quantile_integer_above_one_raises(self):
1032
1032
  with pytest.raises(QQLSyntaxError):
1033
1033
  parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 2")
1034
+
1035
+
1036
+ class TestTurboQuantCreate:
1037
+ """Parser tests for QUANTIZE TURBO [BITS n] [ALWAYS RAM]."""
1038
+
1039
+ # ── Default / no options ──────────────────────────────────────────────
1040
+
1041
+ def test_turbo_no_options(self):
1042
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO")
1043
+ assert node.quantization is not None
1044
+ assert node.quantization.type == QuantizationType.TURBO
1045
+ assert node.quantization.turbo_bits is None
1046
+ assert node.quantization.always_ram is False
1047
+
1048
+ # ── BITS variants ─────────────────────────────────────────────────────
1049
+
1050
+ def test_turbo_bits4(self):
1051
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 4")
1052
+ assert node.quantization.type == QuantizationType.TURBO
1053
+ assert node.quantization.turbo_bits == 4.0
1054
+
1055
+ def test_turbo_bits2(self):
1056
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 2")
1057
+ assert node.quantization.turbo_bits == 2.0
1058
+
1059
+ def test_turbo_bits1_5(self):
1060
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5")
1061
+ assert node.quantization.turbo_bits == 1.5
1062
+
1063
+ def test_turbo_bits1(self):
1064
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 1")
1065
+ assert node.quantization.turbo_bits == 1.0
1066
+
1067
+ # ── ALWAYS RAM ────────────────────────────────────────────────────────
1068
+
1069
+ def test_turbo_always_ram_no_bits(self):
1070
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO ALWAYS RAM")
1071
+ assert node.quantization.type == QuantizationType.TURBO
1072
+ assert node.quantization.always_ram is True
1073
+ assert node.quantization.turbo_bits is None
1074
+
1075
+ def test_turbo_bits_and_always_ram(self):
1076
+ node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 2 ALWAYS RAM")
1077
+ assert node.quantization.turbo_bits == 2.0
1078
+ assert node.quantization.always_ram is True
1079
+
1080
+ # ── Composed with other clauses ───────────────────────────────────────
1081
+
1082
+ def test_turbo_with_hybrid_shorthand(self):
1083
+ node = parse("CREATE COLLECTION articles HYBRID QUANTIZE TURBO")
1084
+ assert node.hybrid is True
1085
+ assert node.quantization.type == QuantizationType.TURBO
1086
+
1087
+ def test_turbo_with_using_hybrid(self):
1088
+ node = parse("CREATE COLLECTION articles USING HYBRID QUANTIZE TURBO BITS 2")
1089
+ assert node.hybrid is True
1090
+ assert node.quantization.turbo_bits == 2.0
1091
+
1092
+ def test_turbo_with_model(self):
1093
+ node = parse("CREATE COLLECTION articles USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO BITS 1.5")
1094
+ assert node.model == "BAAI/bge-base-en-v1.5"
1095
+ assert node.quantization.type == QuantizationType.TURBO
1096
+ assert node.quantization.turbo_bits == 1.5
1097
+
1098
+ def test_turbo_with_hybrid_dense_model(self):
1099
+ node = parse("CREATE COLLECTION articles USING HYBRID DENSE MODEL 'x' QUANTIZE TURBO BITS 1 ALWAYS RAM")
1100
+ assert node.hybrid is True
1101
+ assert node.model == "x"
1102
+ assert node.quantization.turbo_bits == 1.0
1103
+ assert node.quantization.always_ram is True
1104
+
1105
+ # ── Error cases ───────────────────────────────────────────────────────
1106
+
1107
+ def test_turbo_invalid_bits_raises(self):
1108
+ with pytest.raises(QQLSyntaxError):
1109
+ parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 3")
1110
+
1111
+ def test_turbo_invalid_bits_float_raises(self):
1112
+ with pytest.raises(QQLSyntaxError):
1113
+ parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 0.5")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes