qql-cli 2.0.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {qql_cli-2.0.0 → qql_cli-2.1.0}/PKG-INFO +8 -5
- {qql_cli-2.0.0 → qql_cli-2.1.0}/README.md +5 -2
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/collections.md +52 -14
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/scripts.md +3 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/pyproject.toml +3 -3
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/ast_nodes.py +4 -2
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/cli.py +11 -2
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/dumper.py +11 -6
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/executor.py +29 -3
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/lexer.py +5 -1
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/parser.py +25 -1
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_dumper.py +39 -5
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_executor.py +107 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_parser.py +80 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/.github/workflows/ci.yml +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/.github/workflows/publish.yml +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/.gitignore +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/LICENSE +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/_config.yml +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/filters.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/getting-started.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/index.html +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/insert.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/programmatic.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/reference.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/robots.txt +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/search.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/docs/sitemap.xml +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/main.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/Features.md +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/sample.qql +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/resources/sample_v2.qql +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/__init__.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/config.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/embedder.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/exceptions.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/src/qql/script.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/__init__.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_lexer.py +0 -0
- {qql_cli-2.0.0 → qql_cli-2.1.0}/tests/test_script.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: qql-cli
|
|
3
|
-
Version: 2.
|
|
4
|
-
Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), WHERE clause filters, script execution, and collection dump/restore.
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore.
|
|
5
5
|
Project-URL: Homepage, https://github.com/pavanjava/qql
|
|
6
6
|
Project-URL: Repository, https://github.com/pavanjava/qql
|
|
7
7
|
Project-URL: Documentation, https://pavanjava.github.io/qql
|
|
@@ -45,7 +45,7 @@ Classifier: Topic :: Utilities
|
|
|
45
45
|
Requires-Python: >=3.12
|
|
46
46
|
Requires-Dist: click>=8.1.0
|
|
47
47
|
Requires-Dist: prompt-toolkit>=3.0.0
|
|
48
|
-
Requires-Dist: qdrant-client[fastembed]>=1.
|
|
48
|
+
Requires-Dist: qdrant-client[fastembed]>=1.18.0
|
|
49
49
|
Requires-Dist: rich>=13.0.0
|
|
50
50
|
Description-Content-Type: text/markdown
|
|
51
51
|
|
|
@@ -58,7 +58,7 @@ Description-Content-Type: text/markdown
|
|
|
58
58
|
[](LICENSE)
|
|
59
59
|
[](tests/)
|
|
60
60
|
|
|
61
|
-
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
61
|
+
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
62
62
|
|
|
63
63
|
```
|
|
64
64
|
qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
|
|
@@ -135,7 +135,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
|
|
|
135
135
|
| [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
|
|
136
136
|
| [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
|
|
137
137
|
| [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
|
|
138
|
-
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/binary/product), CREATE INDEX |
|
|
138
|
+
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
|
|
139
139
|
| [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
|
|
140
140
|
| [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
|
|
141
141
|
| [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
|
|
@@ -162,6 +162,9 @@ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
|
|
|
162
162
|
CREATE COLLECTION articles
|
|
163
163
|
CREATE COLLECTION articles HYBRID
|
|
164
164
|
CREATE COLLECTION articles QUANTIZE SCALAR
|
|
165
|
+
CREATE COLLECTION articles QUANTIZE TURBO
|
|
166
|
+
CREATE COLLECTION articles QUANTIZE TURBO BITS 2
|
|
167
|
+
CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5 ALWAYS RAM
|
|
165
168
|
CREATE INDEX ON COLLECTION articles FOR year TYPE integer
|
|
166
169
|
SHOW COLLECTIONS
|
|
167
170
|
DROP COLLECTION articles
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
[](LICENSE)
|
|
8
8
|
[](tests/)
|
|
9
9
|
|
|
10
|
-
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
10
|
+
Write `INSERT`, `SEARCH`, `RECOMMEND`, `DELETE`, and `CREATE COLLECTION` statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), SQL-style `WHERE` filters, script execution, and collection dump/restore.
|
|
11
11
|
|
|
12
12
|
```
|
|
13
13
|
qql> INSERT INTO COLLECTION notes VALUES {'text': 'Qdrant is a vector database', 'author': 'alice', 'year': 2024}
|
|
@@ -84,7 +84,7 @@ Full documentation lives in the [`docs/`](docs/) folder and at **[pavanjava.gith
|
|
|
84
84
|
| [INSERT / INSERT BULK](docs/insert.md) | Adding documents, batch inserts, payload types |
|
|
85
85
|
| [SEARCH / RECOMMEND / Hybrid / RERANK](docs/search.md) | Semantic search, hybrid, reranking, recommendations |
|
|
86
86
|
| [WHERE Filters](docs/filters.md) | Full SQL-style filter operators |
|
|
87
|
-
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/binary/product), CREATE INDEX |
|
|
87
|
+
| [Collections & Quantization](docs/collections.md) | CREATE, DROP, QUANTIZE (scalar/turbo/binary/product), CREATE INDEX |
|
|
88
88
|
| [Scripts: EXECUTE / DUMP](docs/scripts.md) | Script files, collection backup/restore |
|
|
89
89
|
| [Programmatic Usage](docs/programmatic.md) | Use QQL as a Python library |
|
|
90
90
|
| [Reference: Models / Config / Errors](docs/reference.md) | Embedding models, config file, error reference |
|
|
@@ -111,6 +111,9 @@ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
|
|
|
111
111
|
CREATE COLLECTION articles
|
|
112
112
|
CREATE COLLECTION articles HYBRID
|
|
113
113
|
CREATE COLLECTION articles QUANTIZE SCALAR
|
|
114
|
+
CREATE COLLECTION articles QUANTIZE TURBO
|
|
115
|
+
CREATE COLLECTION articles QUANTIZE TURBO BITS 2
|
|
116
|
+
CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5 ALWAYS RAM
|
|
114
117
|
CREATE INDEX ON COLLECTION articles FOR year TYPE integer
|
|
115
118
|
SHOW COLLECTIONS
|
|
116
119
|
DROP COLLECTION articles
|
|
@@ -67,27 +67,38 @@ When `USING MODEL` is omitted, the collection uses the **default embedding model
|
|
|
67
67
|
|
|
68
68
|
## Quantization — QUANTIZE clause
|
|
69
69
|
|
|
70
|
-
Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all
|
|
70
|
+
Quantization reduces the memory footprint of vector collections and speeds up search at the cost of a small, controllable accuracy loss. QQL supports all four Qdrant quantization strategies via an optional `QUANTIZE` clause appended to `CREATE COLLECTION`.
|
|
71
71
|
|
|
72
|
-
**
|
|
72
|
+
**Four strategies:**
|
|
73
73
|
|
|
74
|
-
| Type | Compression | Accuracy
|
|
74
|
+
| Type | Compression | Accuracy | Best For |
|
|
75
75
|
|---|---|---|---|
|
|
76
|
-
| `SCALAR` | 4× (float32 → int8) | < 1% | Most collections — best balance |
|
|
77
|
-
| `
|
|
76
|
+
| `SCALAR` | 4× (float32 → int8) | < 1% loss | Most collections — best balance |
|
|
77
|
+
| `TURBO` | 8–32× (4-bit to 1-bit) | Low–medium | Better recall than BINARY at same storage budget |
|
|
78
|
+
| `BINARY` | 32× (float32 → 1-bit) | Higher loss | Speed priority; centered distributions only |
|
|
78
79
|
| `PRODUCT` | 4× (configurable) | Variable | Memory-constrained deployments |
|
|
79
80
|
|
|
80
81
|
**Full syntax:**
|
|
81
82
|
```
|
|
82
83
|
CREATE COLLECTION <name> ... QUANTIZE SCALAR [QUANTILE <0.0–1.0>] [ALWAYS RAM]
|
|
84
|
+
CREATE COLLECTION <name> ... QUANTIZE TURBO [BITS <1|1.5|2|4>] [ALWAYS RAM]
|
|
83
85
|
CREATE COLLECTION <name> ... QUANTIZE BINARY [ALWAYS RAM]
|
|
84
86
|
CREATE COLLECTION <name> ... QUANTIZE PRODUCT [ALWAYS RAM]
|
|
85
87
|
```
|
|
86
88
|
|
|
87
|
-
- **`QUANTILE <float>`** — (
|
|
88
|
-
- **`
|
|
89
|
+
- **`QUANTILE <float>`** — (SCALAR only) calibration quantile for the INT8 conversion; defaults to Qdrant's built-in default (0.99) when omitted.
|
|
90
|
+
- **`BITS <depth>`** — (TURBO only) bit depth passed to the Qdrant SDK:
|
|
91
|
+
- `4` — 4-bit (default when `BITS` is omitted; server applies its own default)
|
|
92
|
+
- `2` — 2-bit
|
|
93
|
+
- `1.5` — 1.5-bit
|
|
94
|
+
- `1` — 1-bit
|
|
95
|
+
> Compression ratios (8×, 16×, 24×, 32×) and recall characteristics are
|
|
96
|
+
> Qdrant server-side behaviors. QQL maps the `BITS` value to the SDK model and
|
|
97
|
+
> passes it to Qdrant; actual results depend on your Qdrant server version.
|
|
98
|
+
- **`ALWAYS RAM`** — keep the **quantized** vectors in RAM at all times, regardless of the collection's `on_disk` setting. Improves search throughput at the cost of higher RAM usage for the compressed index. The original full-precision vectors are stored and managed independently of this flag. Supported by all four quantization types.
|
|
89
99
|
- **`QUANTIZE`** always appears **after** all other clauses (`HYBRID`, `USING MODEL`, etc.).
|
|
90
100
|
- For `PRODUCT`, the compression ratio is fixed at **4×** in this version.
|
|
101
|
+
- For `TURBO`, Cosine, Dot, and Euclidean distance are supported by the Qdrant server when TurboQuant is enabled.
|
|
91
102
|
- When used with `HYBRID` collections, quantization applies only to the **dense** vector.
|
|
92
103
|
|
|
93
104
|
**Examples:**
|
|
@@ -102,6 +113,26 @@ Scalar with explicit calibration and quantized vectors pinned to RAM:
|
|
|
102
113
|
CREATE COLLECTION research_papers QUANTIZE SCALAR QUANTILE 0.95 ALWAYS RAM
|
|
103
114
|
```
|
|
104
115
|
|
|
116
|
+
TurboQuant — default 4-bit (8× compression, good recall):
|
|
117
|
+
```sql
|
|
118
|
+
CREATE COLLECTION research_papers QUANTIZE TURBO
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
TurboQuant — 2-bit (16× compression):
|
|
122
|
+
```sql
|
|
123
|
+
CREATE COLLECTION research_papers QUANTIZE TURBO BITS 2
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
TurboQuant — 1.5-bit (24× compression) with quantized vectors pinned to RAM:
|
|
127
|
+
```sql
|
|
128
|
+
CREATE COLLECTION research_papers QUANTIZE TURBO BITS 1.5 ALWAYS RAM
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
TurboQuant — 1-bit (32× compression, same ratio as BINARY but better recall):
|
|
132
|
+
```sql
|
|
133
|
+
CREATE COLLECTION research_papers QUANTIZE TURBO BITS 1
|
|
134
|
+
```
|
|
135
|
+
|
|
105
136
|
Binary quantization for large high-dimensional embeddings:
|
|
106
137
|
```sql
|
|
107
138
|
CREATE COLLECTION research_papers QUANTIZE BINARY
|
|
@@ -115,22 +146,29 @@ CREATE COLLECTION research_papers QUANTIZE PRODUCT ALWAYS RAM
|
|
|
115
146
|
Combined with hybrid collection:
|
|
116
147
|
```sql
|
|
117
148
|
CREATE COLLECTION research_papers HYBRID QUANTIZE SCALAR
|
|
149
|
+
CREATE COLLECTION research_papers HYBRID QUANTIZE TURBO BITS 2
|
|
118
150
|
```
|
|
119
151
|
|
|
120
152
|
Combined with a pinned model:
|
|
121
153
|
```sql
|
|
122
154
|
CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE SCALAR QUANTILE 0.99
|
|
155
|
+
CREATE COLLECTION research_papers USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO BITS 2
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Combined with hybrid + dense model:
|
|
159
|
+
```sql
|
|
160
|
+
CREATE COLLECTION research_papers USING HYBRID DENSE MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO
|
|
123
161
|
```
|
|
124
162
|
|
|
125
163
|
**Valid combinations:**
|
|
126
164
|
|
|
127
|
-
| Base form | +
|
|
128
|
-
|
|
129
|
-
| `CREATE COLLECTION name` | ✓ | ✓ | ✓ |
|
|
130
|
-
| `... HYBRID` | ✓ | ✓ | ✓ |
|
|
131
|
-
| `... USING MODEL 'x'` | ✓ | ✓ | ✓ |
|
|
132
|
-
| `... USING HYBRID` | ✓ | ✓ | ✓ |
|
|
133
|
-
| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ |
|
|
165
|
+
| Base form | + SCALAR | + TURBO | + BINARY | + PRODUCT |
|
|
166
|
+
|---|---|---|---|---|
|
|
167
|
+
| `CREATE COLLECTION name` | ✓ | ✓ | ✓ | ✓ |
|
|
168
|
+
| `... HYBRID` | ✓ | ✓ | ✓ | ✓ |
|
|
169
|
+
| `... USING MODEL 'x'` | ✓ | ✓ | ✓ | ✓ |
|
|
170
|
+
| `... USING HYBRID` | ✓ | ✓ | ✓ | ✓ |
|
|
171
|
+
| `... USING HYBRID DENSE MODEL 'x'` | ✓ | ✓ | ✓ | ✓ |
|
|
134
172
|
|
|
135
173
|
> INSERT and SEARCH on quantized collections work exactly the same as on non-quantized ones — no changes to INSERT or SEARCH syntax are needed.
|
|
136
174
|
|
|
@@ -79,6 +79,9 @@ Export every point in a collection to a `.qql` script file. The generated file i
|
|
|
79
79
|
**CLI usage:**
|
|
80
80
|
```bash
|
|
81
81
|
qql dump <collection_name> <output.qql>
|
|
82
|
+
|
|
83
|
+
# Override the default 50 points/INSERT BULK batch
|
|
84
|
+
qql dump <collection_name> <output.qql> --batch-size 200
|
|
82
85
|
```
|
|
83
86
|
|
|
84
87
|
**In-shell usage (inside the QQL REPL):**
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "qql-cli"
|
|
3
|
-
version = "2.
|
|
4
|
-
description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, binary, product), WHERE clause filters, script execution, and collection dump/restore."
|
|
3
|
+
version = "2.1.0"
|
|
4
|
+
description = "QQL is a SQL-like query language and CLI for Qdrant vector database. Write INSERT, SEARCH, RECOMMEND, DELETE, and CREATE COLLECTION statements instead of Python SDK calls. Supports hybrid dense+sparse vector search, cross-encoder reranking, quantization (scalar, turbo, binary, product), WHERE clause filters, script execution, and collection dump/restore."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { file = "LICENSE" }
|
|
7
7
|
requires-python = ">=3.12"
|
|
@@ -37,7 +37,7 @@ classifiers = [
|
|
|
37
37
|
"Topic :: Text Processing :: Indexing",
|
|
38
38
|
]
|
|
39
39
|
dependencies = [
|
|
40
|
-
"qdrant-client[fastembed]>=1.
|
|
40
|
+
"qdrant-client[fastembed]>=1.18.0",
|
|
41
41
|
"click>=8.1.0",
|
|
42
42
|
"rich>=13.0.0",
|
|
43
43
|
"prompt_toolkit>=3.0.0",
|
|
@@ -9,14 +9,16 @@ class QuantizationType(Enum):
|
|
|
9
9
|
SCALAR = "scalar"
|
|
10
10
|
BINARY = "binary"
|
|
11
11
|
PRODUCT = "product"
|
|
12
|
+
TURBO = "turbo"
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
@dataclass(frozen=True)
|
|
15
16
|
class QuantizationConfig:
|
|
16
17
|
"""Quantization settings parsed from a QUANTIZE clause."""
|
|
17
18
|
type: QuantizationType
|
|
18
|
-
quantile: float | None = None
|
|
19
|
-
always_ram: bool = False
|
|
19
|
+
quantile: float | None = None # SCALAR only; None → Qdrant default (0.99)
|
|
20
|
+
always_ram: bool = False # all types; default False
|
|
21
|
+
turbo_bits: float | None = None # TURBO only; None → bits4 (Qdrant default 4-bit, 8×)
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
@dataclass(frozen=True)
|
|
@@ -201,7 +201,14 @@ def execute(file: str, stop_on_error: bool) -> None:
|
|
|
201
201
|
@main.command()
|
|
202
202
|
@click.argument("collection")
|
|
203
203
|
@click.argument("output", type=click.Path())
|
|
204
|
-
|
|
204
|
+
@click.option(
|
|
205
|
+
"--batch-size",
|
|
206
|
+
type=click.IntRange(min=1),
|
|
207
|
+
default=50,
|
|
208
|
+
show_default=True,
|
|
209
|
+
help="Points per INSERT BULK batch in the generated script.",
|
|
210
|
+
)
|
|
211
|
+
def dump(collection: str, output: str, batch_size: int) -> None:
|
|
205
212
|
"""Dump a collection to a .qql script file.
|
|
206
213
|
|
|
207
214
|
OUTPUT is the path for the generated .qql file.
|
|
@@ -230,7 +237,9 @@ def dump(collection: str, output: str) -> None:
|
|
|
230
237
|
console.print(
|
|
231
238
|
f"[bold cyan]Dumping:[/bold cyan] '{collection}' -> {output}\n"
|
|
232
239
|
)
|
|
233
|
-
written, skipped = dump_collection(
|
|
240
|
+
written, skipped = dump_collection(
|
|
241
|
+
collection, output, client, console, err_console, batch_size=batch_size
|
|
242
|
+
)
|
|
234
243
|
|
|
235
244
|
if written == 0 and skipped == 0:
|
|
236
245
|
# collection not found — error already printed by dump_collection
|
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
The generated file contains:
|
|
4
4
|
1. A header comment with metadata
|
|
5
5
|
2. CREATE COLLECTION <name> [HYBRID]
|
|
6
|
-
3. One INSERT BULK statement per batch of
|
|
6
|
+
3. One INSERT BULK statement per batch of *batch_size* points
|
|
7
|
+
(default _DEFAULT_DUMP_BATCH_SIZE = 50, overridable via the CLI flag)
|
|
7
8
|
4. A footer comment with totals
|
|
8
9
|
|
|
9
10
|
The file is valid QQL and can be re-executed with ``qql execute <file>``.
|
|
@@ -20,7 +21,7 @@ from typing import Any
|
|
|
20
21
|
from qdrant_client import QdrantClient
|
|
21
22
|
from rich.console import Console
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
_DEFAULT_DUMP_BATCH_SIZE = 50
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
# ── Value serializer ──────────────────────────────────────────────────────────
|
|
@@ -81,12 +82,16 @@ def dump_collection(
|
|
|
81
82
|
client: QdrantClient,
|
|
82
83
|
console: Console,
|
|
83
84
|
err_console: Console,
|
|
85
|
+
batch_size: int = _DEFAULT_DUMP_BATCH_SIZE,
|
|
84
86
|
) -> tuple[int, int]:
|
|
85
87
|
"""Export every point in *collection* to a .qql script at *output_path*.
|
|
86
88
|
|
|
87
89
|
Returns ``(points_written, points_skipped)`` counts.
|
|
88
90
|
Points without a ``'text'`` key are skipped and counted in *points_skipped*.
|
|
89
91
|
"""
|
|
92
|
+
if batch_size <= 0:
|
|
93
|
+
raise ValueError(f"batch_size must be a positive integer, got {batch_size}")
|
|
94
|
+
|
|
90
95
|
if not client.collection_exists(collection):
|
|
91
96
|
err_console.print(
|
|
92
97
|
f"[bold red]Error:[/bold red] Collection '{collection}' does not exist."
|
|
@@ -100,13 +105,13 @@ def dump_collection(
|
|
|
100
105
|
# ── First pass: count total points for the header ─────────────────────
|
|
101
106
|
count_info = client.count(collection_name=collection, exact=True)
|
|
102
107
|
total_points = count_info.count
|
|
103
|
-
total_batches = max(1, math.ceil(total_points /
|
|
108
|
+
total_batches = max(1, math.ceil(total_points / batch_size))
|
|
104
109
|
|
|
105
110
|
console.print(
|
|
106
111
|
f" Collection type : [cyan]{col_type}[/cyan]\n"
|
|
107
112
|
f" Points : [cyan]{total_points}[/cyan]\n"
|
|
108
113
|
f" Batches : [cyan]{total_batches}[/cyan] "
|
|
109
|
-
f"([dim]{
|
|
114
|
+
f"([dim]{batch_size} points/batch[/dim])\n"
|
|
110
115
|
)
|
|
111
116
|
|
|
112
117
|
out = Path(output_path)
|
|
@@ -140,7 +145,7 @@ def dump_collection(
|
|
|
140
145
|
while True:
|
|
141
146
|
records, next_offset = client.scroll(
|
|
142
147
|
collection_name=collection,
|
|
143
|
-
limit=
|
|
148
|
+
limit=batch_size,
|
|
144
149
|
offset=offset,
|
|
145
150
|
with_payload=True,
|
|
146
151
|
with_vectors=False,
|
|
@@ -150,7 +155,7 @@ def dump_collection(
|
|
|
150
155
|
break
|
|
151
156
|
|
|
152
157
|
batch_num += 1
|
|
153
|
-
batch_start = (batch_num - 1) *
|
|
158
|
+
batch_start = (batch_num - 1) * batch_size + 1
|
|
154
159
|
batch_end = batch_start + len(records) - 1
|
|
155
160
|
|
|
156
161
|
# Filter points that have a 'text' field
|
|
@@ -41,6 +41,9 @@ from qdrant_client.models import (
|
|
|
41
41
|
ScalarQuantization,
|
|
42
42
|
ScalarQuantizationConfig,
|
|
43
43
|
ScalarType,
|
|
44
|
+
TurboQuantBitSize,
|
|
45
|
+
TurboQuantization,
|
|
46
|
+
TurboQuantQuantizationConfig,
|
|
44
47
|
SearchParams,
|
|
45
48
|
SparseVector,
|
|
46
49
|
SparseVectorParams,
|
|
@@ -81,6 +84,7 @@ from .config import QQLConfig
|
|
|
81
84
|
from .embedder import CrossEncoderEmbedder, Embedder, SparseEmbedder
|
|
82
85
|
|
|
83
86
|
_RERANK_FETCH_MULTIPLIER = 4
|
|
87
|
+
_HYBRID_PREFETCH_MULTIPLIER = 4
|
|
84
88
|
_COLLECTION_VISIBILITY_TIMEOUT_SECONDS = 5.0
|
|
85
89
|
_COLLECTION_VISIBILITY_POLL_SECONDS = 0.05
|
|
86
90
|
from .exceptions import QQLRuntimeError
|
|
@@ -446,13 +450,13 @@ class Executor:
|
|
|
446
450
|
Prefetch(
|
|
447
451
|
query=dense_vector,
|
|
448
452
|
using="dense",
|
|
449
|
-
limit=node.limit *
|
|
453
|
+
limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER,
|
|
450
454
|
params=search_params,
|
|
451
455
|
),
|
|
452
456
|
Prefetch(
|
|
453
457
|
query=sparse_vector,
|
|
454
458
|
using="sparse",
|
|
455
|
-
limit=node.limit *
|
|
459
|
+
limit=node.limit * _HYBRID_PREFETCH_MULTIPLIER,
|
|
456
460
|
params=search_params,
|
|
457
461
|
),
|
|
458
462
|
],
|
|
@@ -846,7 +850,7 @@ class Executor:
|
|
|
846
850
|
|
|
847
851
|
def _build_quantization_config(
|
|
848
852
|
self, qc: QuantizationConfig
|
|
849
|
-
) -> ScalarQuantization | BinaryQuantization | ProductQuantization:
|
|
853
|
+
) -> ScalarQuantization | BinaryQuantization | ProductQuantization | TurboQuantization:
|
|
850
854
|
"""Convert a parsed QuantizationConfig to a Qdrant SDK quantization object."""
|
|
851
855
|
if qc.type == QuantizationType.SCALAR:
|
|
852
856
|
return ScalarQuantization(
|
|
@@ -867,6 +871,28 @@ class Executor:
|
|
|
867
871
|
always_ram=qc.always_ram,
|
|
868
872
|
)
|
|
869
873
|
)
|
|
874
|
+
if qc.type == QuantizationType.TURBO:
|
|
875
|
+
_BITS_MAP: dict[float, TurboQuantBitSize] = {
|
|
876
|
+
4.0: TurboQuantBitSize.BITS4,
|
|
877
|
+
2.0: TurboQuantBitSize.BITS2,
|
|
878
|
+
1.5: TurboQuantBitSize.BITS1_5,
|
|
879
|
+
1.0: TurboQuantBitSize.BITS1,
|
|
880
|
+
}
|
|
881
|
+
if qc.turbo_bits is None:
|
|
882
|
+
bits_enum = None # user omitted BITS → preserve None, server applies default
|
|
883
|
+
elif qc.turbo_bits in _BITS_MAP:
|
|
884
|
+
bits_enum = _BITS_MAP[qc.turbo_bits]
|
|
885
|
+
else:
|
|
886
|
+
raise QQLRuntimeError(
|
|
887
|
+
f"Unsupported TURBO bit depth: {qc.turbo_bits}. "
|
|
888
|
+
f"Valid values: 1, 1.5, 2, 4"
|
|
889
|
+
)
|
|
890
|
+
return TurboQuantization(
|
|
891
|
+
turbo=TurboQuantQuantizationConfig(
|
|
892
|
+
bits=bits_enum,
|
|
893
|
+
always_ram=qc.always_ram,
|
|
894
|
+
)
|
|
895
|
+
)
|
|
870
896
|
raise QQLRuntimeError(f"Unknown quantization type: {qc.type}")
|
|
871
897
|
|
|
872
898
|
def _collection_is_hybrid(self, name: str) -> bool:
|
|
@@ -27,6 +27,8 @@ class TokenKind(Enum):
|
|
|
27
27
|
QUANTILE = auto()
|
|
28
28
|
ALWAYS = auto()
|
|
29
29
|
RAM = auto()
|
|
30
|
+
TURBO = auto()
|
|
31
|
+
BITS = auto()
|
|
30
32
|
CREATE = auto()
|
|
31
33
|
INDEX = auto()
|
|
32
34
|
ON = auto()
|
|
@@ -113,6 +115,8 @@ _KEYWORDS: dict[str, TokenKind] = {
|
|
|
113
115
|
"QUANTILE": TokenKind.QUANTILE,
|
|
114
116
|
"ALWAYS": TokenKind.ALWAYS,
|
|
115
117
|
"RAM": TokenKind.RAM,
|
|
118
|
+
"TURBO": TokenKind.TURBO,
|
|
119
|
+
"BITS": TokenKind.BITS,
|
|
116
120
|
"CREATE": TokenKind.CREATE,
|
|
117
121
|
"INDEX": TokenKind.INDEX,
|
|
118
122
|
"ON": TokenKind.ON,
|
|
@@ -209,7 +213,7 @@ class Lexer:
|
|
|
209
213
|
tokens.append(Token(TokenKind.NOT_EQUALS, "!=", i))
|
|
210
214
|
i += 2
|
|
211
215
|
else:
|
|
212
|
-
raise QQLSyntaxError(
|
|
216
|
+
raise QQLSyntaxError("Unexpected character '!'", i)
|
|
213
217
|
elif ch == ">":
|
|
214
218
|
if i + 1 < n and query[i + 1] == "=":
|
|
215
219
|
tokens.append(Token(TokenKind.GTE, ">=", i))
|
|
@@ -248,8 +248,32 @@ class Parser:
|
|
|
248
248
|
always_ram = True
|
|
249
249
|
return QuantizationConfig(type=QuantizationType.PRODUCT, always_ram=always_ram)
|
|
250
250
|
|
|
251
|
+
if tok.kind == TokenKind.TURBO:
|
|
252
|
+
self._advance()
|
|
253
|
+
turbo_bits: float | None = None
|
|
254
|
+
always_ram = False
|
|
255
|
+
if self._peek().kind == TokenKind.BITS:
|
|
256
|
+
self._advance()
|
|
257
|
+
bits_tok = self._peek()
|
|
258
|
+
raw = float(self._parse_number())
|
|
259
|
+
if raw not in (1.0, 1.5, 2.0, 4.0):
|
|
260
|
+
raise QQLSyntaxError(
|
|
261
|
+
f"BITS must be one of 1, 1.5, 2, or 4 for TURBO quantization, got {raw}",
|
|
262
|
+
bits_tok.pos,
|
|
263
|
+
)
|
|
264
|
+
turbo_bits = raw
|
|
265
|
+
if self._peek().kind == TokenKind.ALWAYS:
|
|
266
|
+
self._advance()
|
|
267
|
+
self._expect(TokenKind.RAM)
|
|
268
|
+
always_ram = True
|
|
269
|
+
return QuantizationConfig(
|
|
270
|
+
type=QuantizationType.TURBO,
|
|
271
|
+
turbo_bits=turbo_bits,
|
|
272
|
+
always_ram=always_ram,
|
|
273
|
+
)
|
|
274
|
+
|
|
251
275
|
raise QQLSyntaxError(
|
|
252
|
-
f"Expected SCALAR, BINARY, or
|
|
276
|
+
f"Expected SCALAR, BINARY, PRODUCT, or TURBO after QUANTIZE, got '{tok.value}'",
|
|
253
277
|
tok.pos,
|
|
254
278
|
)
|
|
255
279
|
|
|
@@ -5,7 +5,7 @@ import pytest
|
|
|
5
5
|
from rich.console import Console
|
|
6
6
|
|
|
7
7
|
from qql.dumper import (
|
|
8
|
-
|
|
8
|
+
_DEFAULT_DUMP_BATCH_SIZE,
|
|
9
9
|
_is_hybrid,
|
|
10
10
|
_serialize_dict,
|
|
11
11
|
_serialize_value,
|
|
@@ -32,7 +32,7 @@ def _make_client(mocker, *, exists=True, hybrid=False, points=None, total=None):
|
|
|
32
32
|
"""Build a mock QdrantClient for dump tests.
|
|
33
33
|
|
|
34
34
|
*points* is a list of payload dicts. scroll() returns them all in one
|
|
35
|
-
batch when len(points) <=
|
|
35
|
+
batch when len(points) <= _DEFAULT_DUMP_BATCH_SIZE, else two batches.
|
|
36
36
|
"""
|
|
37
37
|
points = points or []
|
|
38
38
|
client = mocker.MagicMock()
|
|
@@ -202,10 +202,10 @@ class TestDumpCollection:
|
|
|
202
202
|
client.collection_exists.return_value = True
|
|
203
203
|
client.get_collection.return_value.config.params.vectors = mocker.MagicMock(spec=[])
|
|
204
204
|
cnt = mocker.MagicMock()
|
|
205
|
-
cnt.count =
|
|
205
|
+
cnt.count = _DEFAULT_DUMP_BATCH_SIZE + 1
|
|
206
206
|
client.count.return_value = cnt
|
|
207
207
|
|
|
208
|
-
batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(
|
|
208
|
+
batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(_DEFAULT_DUMP_BATCH_SIZE)]
|
|
209
209
|
batch2 = [_make_record(mocker, {"text": "last doc"}, "id-last")]
|
|
210
210
|
# First scroll call returns batch1 with a non-None offset; second returns batch2 + None
|
|
211
211
|
client.scroll.side_effect = [
|
|
@@ -215,7 +215,7 @@ class TestDumpCollection:
|
|
|
215
215
|
|
|
216
216
|
written, skipped = dump_collection("col", out, client, null_console(), null_console())
|
|
217
217
|
content = (tmp_path / "dump.qql").read_text()
|
|
218
|
-
assert written ==
|
|
218
|
+
assert written == _DEFAULT_DUMP_BATCH_SIZE + 1
|
|
219
219
|
assert content.count("INSERT BULK") == 2
|
|
220
220
|
|
|
221
221
|
def test_header_contains_collection_name(self, tmp_path, mocker):
|
|
@@ -230,3 +230,37 @@ class TestDumpCollection:
|
|
|
230
230
|
client = _make_client(mocker, points=[{"text": "x"}])
|
|
231
231
|
dump_collection("col", out, client, null_console(), null_console())
|
|
232
232
|
assert (tmp_path / "sub" / "dir" / "dump.qql").exists()
|
|
233
|
+
|
|
234
|
+
def test_custom_batch_size_splits_pages(self, tmp_path, mocker):
|
|
235
|
+
"""A batch_size of 2 over 3 points should produce two INSERT BULK blocks."""
|
|
236
|
+
out = str(tmp_path / "dump.qql")
|
|
237
|
+
client = mocker.MagicMock()
|
|
238
|
+
client.collection_exists.return_value = True
|
|
239
|
+
client.get_collection.return_value.config.params.vectors = mocker.MagicMock(spec=[])
|
|
240
|
+
cnt = mocker.MagicMock()
|
|
241
|
+
cnt.count = 3
|
|
242
|
+
client.count.return_value = cnt
|
|
243
|
+
|
|
244
|
+
batch1 = [_make_record(mocker, {"text": f"doc {i}"}, f"id-{i}") for i in range(2)]
|
|
245
|
+
batch2 = [_make_record(mocker, {"text": "last"}, "id-last")]
|
|
246
|
+
client.scroll.side_effect = [
|
|
247
|
+
(batch1, "offset-1"),
|
|
248
|
+
(batch2, None),
|
|
249
|
+
]
|
|
250
|
+
|
|
251
|
+
written, _ = dump_collection(
|
|
252
|
+
"col", out, client, null_console(), null_console(), batch_size=2
|
|
253
|
+
)
|
|
254
|
+
content = (tmp_path / "dump.qql").read_text()
|
|
255
|
+
assert written == 3
|
|
256
|
+
assert content.count("INSERT BULK") == 2
|
|
257
|
+
# client.scroll should have been called with limit=2
|
|
258
|
+
assert client.scroll.call_args_list[0].kwargs["limit"] == 2
|
|
259
|
+
|
|
260
|
+
def test_invalid_batch_size_raises(self, tmp_path, mocker):
|
|
261
|
+
out = str(tmp_path / "dump.qql")
|
|
262
|
+
client = _make_client(mocker, points=[{"text": "x"}])
|
|
263
|
+
with pytest.raises(ValueError):
|
|
264
|
+
dump_collection(
|
|
265
|
+
"col", out, client, null_console(), null_console(), batch_size=0
|
|
266
|
+
)
|
|
@@ -1640,3 +1640,110 @@ class TestQuantizeCreate:
|
|
|
1640
1640
|
node = CreateCollectionStmt(collection="articles")
|
|
1641
1641
|
result = executor.execute(node)
|
|
1642
1642
|
assert "quantization" not in result.message
|
|
1643
|
+
|
|
1644
|
+
|
|
1645
|
+
class TestTurboQuantCreate:
|
|
1646
|
+
"""Executor tests for QUANTIZE TURBO — verifies correct SDK objects are built."""
|
|
1647
|
+
|
|
1648
|
+
@pytest.fixture
|
|
1649
|
+
def executor(self, cfg, mock_client):
|
|
1650
|
+
return Executor(mock_client, cfg)
|
|
1651
|
+
|
|
1652
|
+
# ── TurboQuantization object is produced ──────────────────────────────
|
|
1653
|
+
|
|
1654
|
+
def test_turbo_passes_turbo_quantization(self, executor, mock_client):
|
|
1655
|
+
from qdrant_client.models import TurboQuantization
|
|
1656
|
+
node = CreateCollectionStmt(
|
|
1657
|
+
collection="articles",
|
|
1658
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO),
|
|
1659
|
+
)
|
|
1660
|
+
executor.execute(node)
|
|
1661
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1662
|
+
assert isinstance(kw.get("quantization_config"), TurboQuantization)
|
|
1663
|
+
|
|
1664
|
+
def test_turbo_default_bits_is_none(self, executor, mock_client):
|
|
1665
|
+
"""When BITS is omitted, bits must be None — preserving omission so the
|
|
1666
|
+
SDK/server applies its own default rather than QQL forcing BITS4."""
|
|
1667
|
+
node = CreateCollectionStmt(
|
|
1668
|
+
collection="articles",
|
|
1669
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO),
|
|
1670
|
+
)
|
|
1671
|
+
executor.execute(node)
|
|
1672
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1673
|
+
assert kw["quantization_config"].turbo.bits is None
|
|
1674
|
+
|
|
1675
|
+
def test_turbo_bits2(self, executor, mock_client):
|
|
1676
|
+
from qdrant_client.models import TurboQuantBitSize
|
|
1677
|
+
node = CreateCollectionStmt(
|
|
1678
|
+
collection="articles",
|
|
1679
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=2.0),
|
|
1680
|
+
)
|
|
1681
|
+
executor.execute(node)
|
|
1682
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1683
|
+
assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS2
|
|
1684
|
+
|
|
1685
|
+
def test_turbo_bits1_5(self, executor, mock_client):
|
|
1686
|
+
from qdrant_client.models import TurboQuantBitSize
|
|
1687
|
+
node = CreateCollectionStmt(
|
|
1688
|
+
collection="articles",
|
|
1689
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=1.5),
|
|
1690
|
+
)
|
|
1691
|
+
executor.execute(node)
|
|
1692
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1693
|
+
assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS1_5
|
|
1694
|
+
|
|
1695
|
+
def test_turbo_bits1(self, executor, mock_client):
|
|
1696
|
+
from qdrant_client.models import TurboQuantBitSize
|
|
1697
|
+
node = CreateCollectionStmt(
|
|
1698
|
+
collection="articles",
|
|
1699
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=1.0),
|
|
1700
|
+
)
|
|
1701
|
+
executor.execute(node)
|
|
1702
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1703
|
+
assert kw["quantization_config"].turbo.bits == TurboQuantBitSize.BITS1
|
|
1704
|
+
|
|
1705
|
+
def test_turbo_always_ram_true(self, executor, mock_client):
|
|
1706
|
+
node = CreateCollectionStmt(
|
|
1707
|
+
collection="articles",
|
|
1708
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO, always_ram=True),
|
|
1709
|
+
)
|
|
1710
|
+
executor.execute(node)
|
|
1711
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1712
|
+
assert kw["quantization_config"].turbo.always_ram is True
|
|
1713
|
+
|
|
1714
|
+
def test_turbo_always_ram_false_by_default(self, executor, mock_client):
|
|
1715
|
+
node = CreateCollectionStmt(
|
|
1716
|
+
collection="articles",
|
|
1717
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO),
|
|
1718
|
+
)
|
|
1719
|
+
executor.execute(node)
|
|
1720
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1721
|
+
assert kw["quantization_config"].turbo.always_ram is False
|
|
1722
|
+
|
|
1723
|
+
def test_turbo_hybrid_collection_has_both_configs(self, executor, mock_client):
|
|
1724
|
+
from qdrant_client.models import TurboQuantization
|
|
1725
|
+
node = CreateCollectionStmt(
|
|
1726
|
+
collection="articles",
|
|
1727
|
+
hybrid=True,
|
|
1728
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO),
|
|
1729
|
+
)
|
|
1730
|
+
executor.execute(node)
|
|
1731
|
+
kw = mock_client.create_collection.call_args.kwargs
|
|
1732
|
+
assert isinstance(kw.get("quantization_config"), TurboQuantization)
|
|
1733
|
+
assert "sparse_vectors_config" in kw
|
|
1734
|
+
|
|
1735
|
+
def test_turbo_result_message_includes_turbo(self, executor, mock_client):
|
|
1736
|
+
node = CreateCollectionStmt(
|
|
1737
|
+
collection="articles",
|
|
1738
|
+
quantization=QuantizationConfig(type=QuantizationType.TURBO),
|
|
1739
|
+
)
|
|
1740
|
+
result = executor.execute(node)
|
|
1741
|
+
assert "turbo" in result.message
|
|
1742
|
+
|
|
1743
|
+
def test_turbo_invalid_bits_at_executor_raises(self, executor, mock_client):
|
|
1744
|
+
"""An unexpected turbo_bits value that bypasses parser validation must
|
|
1745
|
+
raise QQLRuntimeError explicitly instead of silently coercing to BITS4."""
|
|
1746
|
+
from qql.exceptions import QQLRuntimeError as QQLErr
|
|
1747
|
+
qc = QuantizationConfig(type=QuantizationType.TURBO, turbo_bits=3.0)
|
|
1748
|
+
with pytest.raises(QQLErr, match="Unsupported TURBO bit depth"):
|
|
1749
|
+
executor._build_quantization_config(qc)
|
|
@@ -1031,3 +1031,83 @@ class TestQuantizeCreate:
|
|
|
1031
1031
|
def test_scalar_quantile_integer_above_one_raises(self):
|
|
1032
1032
|
with pytest.raises(QQLSyntaxError):
|
|
1033
1033
|
parse("CREATE COLLECTION articles QUANTIZE SCALAR QUANTILE 2")
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
class TestTurboQuantCreate:
|
|
1037
|
+
"""Parser tests for QUANTIZE TURBO [BITS n] [ALWAYS RAM]."""
|
|
1038
|
+
|
|
1039
|
+
# ── Default / no options ──────────────────────────────────────────────
|
|
1040
|
+
|
|
1041
|
+
def test_turbo_no_options(self):
|
|
1042
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO")
|
|
1043
|
+
assert node.quantization is not None
|
|
1044
|
+
assert node.quantization.type == QuantizationType.TURBO
|
|
1045
|
+
assert node.quantization.turbo_bits is None
|
|
1046
|
+
assert node.quantization.always_ram is False
|
|
1047
|
+
|
|
1048
|
+
# ── BITS variants ─────────────────────────────────────────────────────
|
|
1049
|
+
|
|
1050
|
+
def test_turbo_bits4(self):
|
|
1051
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 4")
|
|
1052
|
+
assert node.quantization.type == QuantizationType.TURBO
|
|
1053
|
+
assert node.quantization.turbo_bits == 4.0
|
|
1054
|
+
|
|
1055
|
+
def test_turbo_bits2(self):
|
|
1056
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 2")
|
|
1057
|
+
assert node.quantization.turbo_bits == 2.0
|
|
1058
|
+
|
|
1059
|
+
def test_turbo_bits1_5(self):
|
|
1060
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 1.5")
|
|
1061
|
+
assert node.quantization.turbo_bits == 1.5
|
|
1062
|
+
|
|
1063
|
+
def test_turbo_bits1(self):
|
|
1064
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 1")
|
|
1065
|
+
assert node.quantization.turbo_bits == 1.0
|
|
1066
|
+
|
|
1067
|
+
# ── ALWAYS RAM ────────────────────────────────────────────────────────
|
|
1068
|
+
|
|
1069
|
+
def test_turbo_always_ram_no_bits(self):
|
|
1070
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO ALWAYS RAM")
|
|
1071
|
+
assert node.quantization.type == QuantizationType.TURBO
|
|
1072
|
+
assert node.quantization.always_ram is True
|
|
1073
|
+
assert node.quantization.turbo_bits is None
|
|
1074
|
+
|
|
1075
|
+
def test_turbo_bits_and_always_ram(self):
|
|
1076
|
+
node = parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 2 ALWAYS RAM")
|
|
1077
|
+
assert node.quantization.turbo_bits == 2.0
|
|
1078
|
+
assert node.quantization.always_ram is True
|
|
1079
|
+
|
|
1080
|
+
# ── Composed with other clauses ───────────────────────────────────────
|
|
1081
|
+
|
|
1082
|
+
def test_turbo_with_hybrid_shorthand(self):
|
|
1083
|
+
node = parse("CREATE COLLECTION articles HYBRID QUANTIZE TURBO")
|
|
1084
|
+
assert node.hybrid is True
|
|
1085
|
+
assert node.quantization.type == QuantizationType.TURBO
|
|
1086
|
+
|
|
1087
|
+
def test_turbo_with_using_hybrid(self):
|
|
1088
|
+
node = parse("CREATE COLLECTION articles USING HYBRID QUANTIZE TURBO BITS 2")
|
|
1089
|
+
assert node.hybrid is True
|
|
1090
|
+
assert node.quantization.turbo_bits == 2.0
|
|
1091
|
+
|
|
1092
|
+
def test_turbo_with_model(self):
|
|
1093
|
+
node = parse("CREATE COLLECTION articles USING MODEL 'BAAI/bge-base-en-v1.5' QUANTIZE TURBO BITS 1.5")
|
|
1094
|
+
assert node.model == "BAAI/bge-base-en-v1.5"
|
|
1095
|
+
assert node.quantization.type == QuantizationType.TURBO
|
|
1096
|
+
assert node.quantization.turbo_bits == 1.5
|
|
1097
|
+
|
|
1098
|
+
def test_turbo_with_hybrid_dense_model(self):
|
|
1099
|
+
node = parse("CREATE COLLECTION articles USING HYBRID DENSE MODEL 'x' QUANTIZE TURBO BITS 1 ALWAYS RAM")
|
|
1100
|
+
assert node.hybrid is True
|
|
1101
|
+
assert node.model == "x"
|
|
1102
|
+
assert node.quantization.turbo_bits == 1.0
|
|
1103
|
+
assert node.quantization.always_ram is True
|
|
1104
|
+
|
|
1105
|
+
# ── Error cases ───────────────────────────────────────────────────────
|
|
1106
|
+
|
|
1107
|
+
def test_turbo_invalid_bits_raises(self):
|
|
1108
|
+
with pytest.raises(QQLSyntaxError):
|
|
1109
|
+
parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 3")
|
|
1110
|
+
|
|
1111
|
+
def test_turbo_invalid_bits_float_raises(self):
|
|
1112
|
+
with pytest.raises(QQLSyntaxError):
|
|
1113
|
+
parse("CREATE COLLECTION articles QUANTIZE TURBO BITS 0.5")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|