qql-cli 1.2.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {qql_cli-1.2.0 → qql_cli-1.4.0}/PKG-INFO +281 -15
  2. {qql_cli-1.2.0 → qql_cli-1.4.0}/README.md +280 -14
  3. {qql_cli-1.2.0 → qql_cli-1.4.0}/pyproject.toml +1 -1
  4. qql_cli-1.4.0/resources/Features.md +480 -0
  5. qql_cli-1.4.0/resources/sample.qql +537 -0
  6. qql_cli-1.4.0/resources/sample_v2.qql +124 -0
  7. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/ast_nodes.py +17 -0
  8. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/cli.py +180 -3
  9. qql_cli-1.4.0/src/qql/dumper.py +212 -0
  10. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/executor.py +185 -17
  11. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/lexer.py +20 -0
  12. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/parser.py +126 -6
  13. qql_cli-1.4.0/src/qql/script.py +158 -0
  14. qql_cli-1.4.0/tests/test_dumper.py +232 -0
  15. {qql_cli-1.2.0 → qql_cli-1.4.0}/tests/test_executor.py +271 -0
  16. {qql_cli-1.2.0 → qql_cli-1.4.0}/tests/test_parser.py +107 -0
  17. qql_cli-1.4.0/tests/test_script.py +205 -0
  18. {qql_cli-1.2.0 → qql_cli-1.4.0}/.github/workflows/ci.yml +0 -0
  19. {qql_cli-1.2.0 → qql_cli-1.4.0}/.github/workflows/publish.yml +0 -0
  20. {qql_cli-1.2.0 → qql_cli-1.4.0}/.gitignore +0 -0
  21. {qql_cli-1.2.0 → qql_cli-1.4.0}/LICENSE +0 -0
  22. {qql_cli-1.2.0 → qql_cli-1.4.0}/main.py +0 -0
  23. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/__init__.py +0 -0
  24. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/config.py +0 -0
  25. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/embedder.py +0 -0
  26. {qql_cli-1.2.0 → qql_cli-1.4.0}/src/qql/exceptions.py +0 -0
  27. {qql_cli-1.2.0 → qql_cli-1.4.0}/tests/__init__.py +0 -0
  28. {qql_cli-1.2.0 → qql_cli-1.4.0}/tests/test_lexer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qql-cli
3
- Version: 1.2.0
3
+ Version: 1.4.0
4
4
  Summary: A SQL-like query language CLI wrapper for Qdrant vector database
5
5
  Project-URL: Homepage, https://github.com/pavanjava/qql
6
6
  Project-URL: Repository, https://github.com/pavanjava/qql
@@ -83,7 +83,9 @@ qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK
83
83
  - [The QQL Shell](#the-qql-shell)
84
84
  - [All QQL Operations](#all-qql-operations)
85
85
  - [INSERT — add a point](#insert--add-a-point)
86
+ - [INSERT BULK — batch insert](#insert-bulk--batch-insert-multiple-points)
86
87
  - [SEARCH — find similar points](#search--find-similar-points)
88
+ - [RECOMMEND — retrieve by example IDs](#recommend--retrieve-by-example-ids)
87
89
  - [Query-Time Search Params (`EXACT`, `WITH`)](#query-time-search-params-exact-with)
88
90
  - [WHERE Clause Filters](#where-clause-filters)
89
91
  - [Hybrid Search (USING HYBRID)](#hybrid-search-using-hybrid)
@@ -92,6 +94,9 @@ qql> SEARCH notes SIMILAR TO 'vector databases' LIMIT 5 USING HYBRID RERANK
92
94
  - [CREATE COLLECTION — create a collection](#create-collection--create-a-collection)
93
95
  - [DROP COLLECTION — delete a collection](#drop-collection--delete-a-collection)
94
96
  - [DELETE — remove a point](#delete--remove-a-point)
97
+ - [Script Files](#script-files)
98
+ - [EXECUTE — run a script file](#execute--run-a-qql-script-file)
99
+ - [DUMP COLLECTION — export to script](#dump-collection--export-collection-to-a-qql-script-file)
95
100
  - [Embedding Models](#embedding-models)
96
101
  - [Value Types in Dictionaries](#value-types-in-dictionaries)
97
102
  - [Configuration File](#configuration-file)
@@ -230,6 +235,8 @@ Inserts a new document into a collection. The `text` field is **mandatory** —
230
235
 
231
236
  If the collection does not exist yet, it is **created automatically** with the correct vector dimensions.
232
237
 
238
+ If you include an `id` field in `VALUES`, QQL uses it as the Qdrant point ID. Supported explicit IDs are unsigned integers or UUID strings. If you omit `id`, QQL generates a UUID automatically.
239
+
233
240
  **Syntax:**
234
241
  ```
235
242
  INSERT INTO COLLECTION <collection_name> VALUES {<dict>}
@@ -248,6 +255,7 @@ INSERT INTO COLLECTION articles VALUES {'text': 'Qdrant supports cosine similari
248
255
  Insert with metadata:
249
256
  ```sql
250
257
  INSERT INTO COLLECTION articles VALUES {
258
+ 'id': 1001,
251
259
  'text': 'Neural networks learn representations from data',
252
260
  'author': 'alice',
253
261
  'category': 'ml',
@@ -275,13 +283,13 @@ INSERT INTO COLLECTION articles VALUES {'text': 'hello world'}
275
283
  **What happens internally:**
276
284
  1. The `text` value is embedded into a dense vector using the configured model.
277
285
  2. In hybrid mode, a sparse BM25 vector is also generated.
278
- 3. A UUID is auto-generated as the point ID.
279
- 4. All fields (including `text`) are stored in the payload.
286
+ 3. If `id` is provided, it is used as the point ID; otherwise a UUID is auto-generated.
287
+ 4. All fields except `id` are stored in the payload.
280
288
  5. The point is upserted into Qdrant.
281
289
 
282
290
  **Rules:**
283
291
  - `text` is always required. Omitting it raises an error.
284
- - A point ID (UUID) is generated automatically you do not provide one.
292
+ - `id`, when provided, must be an unsigned integer or UUID string.
285
293
  - If the collection already exists with a different vector size (from a different model), an error is raised with a clear message.
286
294
  - Hybrid inserts require a hybrid collection (created with `CREATE COLLECTION ... HYBRID` or auto-created on first `USING HYBRID` insert).
287
295
 
@@ -293,6 +301,8 @@ Inserts multiple documents in a single statement. Each item in the array must co
293
301
 
294
302
  If the collection does not exist yet, it is **created automatically** on the first bulk insert.
295
303
 
304
+ Each record may optionally include an `id` field. This is the preferred way to keep seed data deterministic and to make follow-up operations like `RECOMMEND` or `DELETE` reproducible.
305
+
296
306
  **Syntax:**
297
307
  ```
298
308
  INSERT BULK INTO COLLECTION <collection_name> VALUES [<dict>, <dict>, ...]
@@ -315,9 +325,9 @@ INSERT BULK INTO COLLECTION articles VALUES [
315
325
  Bulk insert with metadata:
316
326
  ```sql
317
327
  INSERT BULK INTO COLLECTION articles VALUES [
318
- {'text': 'Attention is all you need', 'author': 'vaswani', 'year': 2017},
319
- {'text': 'BERT: Pre-training of deep bidirectional transformers', 'author': 'devlin', 'year': 2018},
320
- {'text': 'Language models are few-shot learners', 'author': 'brown', 'year': 2020}
328
+ {'id': 1001, 'text': 'Attention is all you need', 'author': 'vaswani', 'year': 2017},
329
+ {'id': 1002, 'text': 'BERT: Pre-training of deep bidirectional transformers', 'author': 'devlin', 'year': 2018},
330
+ {'id': 1003, 'text': 'Language models are few-shot learners', 'author': 'brown', 'year': 2020}
321
331
  ]
322
332
  ```
323
333
 
@@ -332,7 +342,7 @@ INSERT BULK INTO COLLECTION articles VALUES [
332
342
  **Rules:**
333
343
  - Every dict in the array must contain a `"text"` key. Missing `text` on any item raises an error with the offending index.
334
344
  - An empty array `[]` raises an error.
335
- - A UUID is auto-generated for each point you do not provide IDs.
345
+ - `id`, when provided, must be an unsigned integer or UUID string.
336
346
  - Supports all the same `USING` clauses as single `INSERT`.
337
347
 
338
348
  ---
@@ -415,13 +425,111 @@ Results are displayed as a table with three columns:
415
425
  ```
416
426
 
417
427
  - **Score** — similarity score. Higher is more relevant.
418
- - **ID** — the UUID of the matching point.
428
+ - **ID** — the point ID returned by Qdrant. This may be an integer or a UUID string.
419
429
  - **Payload** — all fields stored alongside the vector.
420
430
 
421
431
  **Important:** Use the same model for SEARCH as you used for INSERT. Mixing models produces meaningless scores because the vectors live in different spaces.
422
432
 
423
433
  ---
424
434
 
435
+ ### RECOMMEND — retrieve by example IDs
436
+
437
+ Performs a Qdrant recommendation query using existing point IDs as positive and optional negative examples.
438
+
439
+ This is useful when you already know which stored points represent the kind of result you want. Qdrant uses those examples to retrieve nearby points, and QQL automatically excludes the seed IDs from the results.
440
+
441
+ **Syntax:**
442
+ ```sql
443
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n>
444
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) NEGATIVE IDS (<id>, ...) LIMIT <n>
445
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) STRATEGY '<strategy>' LIMIT <n>
446
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WHERE <filter>
447
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> OFFSET <n>
448
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> SCORE THRESHOLD <f>
449
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> WITH { exact: true, hnsw_ef: <n> }
450
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection>
451
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> LOOKUP FROM <collection> VECTOR '<name>'
452
+ RECOMMEND FROM <collection_name> POSITIVE IDS (<id>, ...) LIMIT <n> USING '<vector_name>'
453
+ ```
454
+
455
+ **Examples:**
456
+
457
+ Recommend more results like two known articles:
458
+ ```sql
459
+ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) LIMIT 5
460
+ ```
461
+
462
+ Recommend similar results while steering away from one bad example:
463
+ ```sql
464
+ RECOMMEND FROM articles POSITIVE IDS (1001, 1002) NEGATIVE IDS (1009) LIMIT 5
465
+ ```
466
+
467
+ Use Qdrant's `best_score` recommendation strategy:
468
+ ```sql
469
+ RECOMMEND FROM articles POSITIVE IDS (1001) STRATEGY 'best_score' LIMIT 10
470
+ ```
471
+
472
+ Recommend only within a filtered subset:
473
+ ```sql
474
+ RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 5 WHERE year >= 2020 AND status = 'published'
475
+ ```
476
+
477
+ Paginate recommendations (skip first 5, return next 10):
478
+ ```sql
479
+ RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 10 OFFSET 5
480
+ ```
481
+
482
+ Filter out low-confidence recommendations:
483
+ ```sql
484
+ RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 10 SCORE THRESHOLD 0.5
485
+ ```
486
+
487
+ Exact KNN baseline for recommendations:
488
+ ```sql
489
+ RECOMMEND FROM articles POSITIVE IDS (1001) LIMIT 5 WITH { exact: true }
490
+ ```
491
+
492
+ Cross-collection recommend (look up example IDs from another collection):
493
+ ```sql
494
+ RECOMMEND FROM target_collection
495
+ POSITIVE IDS ('a')
496
+ LOOKUP FROM source_collection VECTOR 'dense'
497
+ LIMIT 5
498
+ ```
499
+
500
+ Recommend using a specific named vector in the target collection:
501
+ ```sql
502
+ RECOMMEND FROM articles
503
+ POSITIVE IDS (1001)
504
+ USING 'sparse'
505
+ LIMIT 5
506
+ ```
507
+
508
+ Full-featured recommend:
509
+ ```sql
510
+ RECOMMEND FROM articles
511
+ POSITIVE IDS (1001, 1002)
512
+ NEGATIVE IDS (1009)
513
+ STRATEGY 'best_score'
514
+ LOOKUP FROM other_collection VECTOR 'dense'
515
+ USING 'dense'
516
+ LIMIT 10
517
+ OFFSET 5
518
+ SCORE THRESHOLD 0.5
519
+ WHERE year >= 2020
520
+ WITH { exact: true }
521
+ ```
522
+
523
+ **Supported strategies:**
524
+
525
+ - `average_vector`
526
+ - `best_score`
527
+ - `sum_scores`
528
+
529
+ **Clause order:** `POSITIVE IDS` → `NEGATIVE IDS` → `STRATEGY` → `LOOKUP FROM` → `USING` → `LIMIT` → `OFFSET` → `SCORE THRESHOLD` → `WHERE` → `WITH`
530
+
531
+ ---
532
+
425
533
  ### Query-Time Search Params (`EXACT`, `WITH`)
426
534
 
427
535
  QQL supports a small set of Qdrant query-time search parameters on `SEARCH` statements.
@@ -862,7 +970,7 @@ Raises an error if the collection does not exist.
862
970
 
863
971
  ### DELETE — remove a point
864
972
 
865
- Deletes a single point from a collection by its ID. The point ID is the UUID returned by INSERT.
973
+ Deletes a single point from a collection by its ID. The ID may be an integer or a UUID string, either generated by QQL or supplied explicitly on INSERT.
866
974
 
867
975
  **Syntax:**
868
976
  ```
@@ -886,6 +994,163 @@ To find a point's ID, run a SEARCH first and copy the ID from the results table.
886
994
 
887
995
  ---
888
996
 
997
+ ## Script Files
998
+
999
+ QQL supports reading from and writing to `.qql` script files, making it easy to automate bulk operations, seed databases, and back up collections.
1000
+
1001
+ ---
1002
+
1003
+ ### EXECUTE — run a .qql script file
1004
+
1005
+ Execute a file containing multiple QQL statements in sequence. Each statement is parsed and executed in order. `--` comments are stripped before parsing.
1006
+
1007
+ **CLI usage:**
1008
+ ```bash
1009
+ qql execute /path/to/script.qql
1010
+
1011
+ # Stop on first error instead of continuing through all statements
1012
+ qql execute /path/to/script.qql --stop-on-error
1013
+ ```
1014
+
1015
+ **In-shell usage (inside the QQL REPL):**
1016
+ ```
1017
+ qql> EXECUTE /path/to/script.qql
1018
+ qql> \e /path/to/script.qql
1019
+ ```
1020
+
1021
+ **Script format:**
1022
+
1023
+ ```sql
1024
+ -- This is a comment — the entire line is ignored
1025
+ -- ============================================================
1026
+ -- QQL Script — populate articles collection
1027
+ -- ============================================================
1028
+
1029
+ -- Step 1: create the collection
1030
+ CREATE COLLECTION articles
1031
+
1032
+ -- Step 2: bulk insert records
1033
+ INSERT BULK INTO COLLECTION articles VALUES [
1034
+ {'text': 'Neural networks learn representations', 'year': 2023},
1035
+ {'text': 'Attention mechanisms in transformers', 'year': 2024}
1036
+ ]
1037
+
1038
+ -- Step 3: verify
1039
+ SHOW COLLECTIONS
1040
+ ```
1041
+
1042
+ **Rules:**
1043
+ - `--` to end-of-line is a comment and is ignored (inline or full-line)
1044
+ - Statements can span multiple lines (e.g. `INSERT BULK ... VALUES [...]`)
1045
+ - `RECOMMEND` statements work in `.qql` files the same way they do in the REPL
1046
+ - Blank lines between statements are ignored
1047
+ - By default all statements run even if one fails; use `--stop-on-error` to halt early
1048
+
1049
+ **Included examples:**
1050
+ - [`resources/sample.qql`](resources/sample.qql) seeds the demo medical dataset
1051
+ - [`resources/sample_v2.qql`](resources/sample_v2.qql) is a compact end-to-end example with explicit IDs and runnable `RECOMMEND` statements
1052
+
1053
+ **Example output:**
1054
+ ```
1055
+ Executing: /path/to/script.qql
1056
+
1057
+ [1/3] CREATE COLLECTION articles
1058
+ ✓ Collection 'articles' created (384-dimensional vectors, cosine distance)
1059
+ [2/3] INSERT BULK INTO COLLECTION articles VALUES [ …
1060
+ ✓ Inserted 2 points
1061
+ [3/3] SHOW COLLECTIONS
1062
+ ✓ 1 collection(s) found
1063
+
1064
+ Done. 3/3 statement(s) succeeded.
1065
+ ```
1066
+
1067
+ ---
1068
+
1069
+ ### DUMP COLLECTION — export collection to a .qql script file
1070
+
1071
+ Export every point in a collection to a `.qql` script file. The generated file is valid QQL — it can be re-imported with `qql execute` to restore or migrate the collection. Points are written in batches of 50 as `INSERT BULK` statements.
1072
+
1073
+ **CLI usage:**
1074
+ ```bash
1075
+ qql dump <collection_name> <output.qql>
1076
+ ```
1077
+
1078
+ **In-shell usage (inside the QQL REPL):**
1079
+ ```
1080
+ qql> DUMP COLLECTION <name> <output.qql>
1081
+ ```
1082
+
1083
+ **Example:**
1084
+ ```bash
1085
+ qql dump medical_records /tmp/medical_records.qql
1086
+ ```
1087
+
1088
+ ```
1089
+ Dumping: 'medical_records' → /tmp/medical_records.qql
1090
+
1091
+ Collection type : hybrid (dense + sparse)
1092
+ Points : 41
1093
+ Batches : 1 (50 points/batch)
1094
+
1095
+ [1/1] wrote 41 point(s)
1096
+
1097
+ Done. 41 point(s) written.
1098
+ ```
1099
+
1100
+ **Generated file structure:**
1101
+ ```sql
1102
+ -- ============================================================
1103
+ -- QQL Dump — collection: medical_records
1104
+ -- Generated : 2026-04-19 14:32:11
1105
+ -- Points : 41
1106
+ -- Type : hybrid (dense + sparse)
1107
+ -- Note : Re-importing re-embeds all text using the
1108
+ -- configured model (see: qql connect).
1109
+ -- ============================================================
1110
+
1111
+ CREATE COLLECTION medical_records HYBRID
1112
+
1113
+ -- Batch 1 / 1 (records 1–41)
1114
+ INSERT BULK INTO COLLECTION medical_records VALUES [
1115
+ {
1116
+ 'text': 'Alzheimers disease is characterized by...',
1117
+ 'title': 'Alzheimers Disease Overview',
1118
+ 'department': 'neurology',
1119
+ 'year': 2023,
1120
+ 'peer_reviewed': true
1121
+ },
1122
+ ...
1123
+ ] USING HYBRID
1124
+
1125
+ -- ============================================================
1126
+ -- End of dump
1127
+ -- Written : 41
1128
+ -- Skipped : 0 (no 'text' field)
1129
+ -- ============================================================
1130
+ ```
1131
+
1132
+ **Round-trip workflow — backup and restore:**
1133
+ ```bash
1134
+ # 1. Dump the collection
1135
+ qql dump medical_records backup.qql
1136
+
1137
+ # 2. Drop it
1138
+ qql> DROP COLLECTION medical_records
1139
+
1140
+ # 3. Restore from the dump
1141
+ qql execute backup.qql
1142
+ ```
1143
+
1144
+ **Rules and notes:**
1145
+ - Points without a `'text'` payload field are **skipped** (counted in the footer comment).
1146
+ - Hybrid collections produce `CREATE COLLECTION <name> HYBRID` and `INSERT BULK ... USING HYBRID` statements.
1147
+ - Dense collections produce plain `CREATE COLLECTION <name>` and `INSERT BULK` statements.
1148
+ - All payload value types are preserved: strings, integers, floats, booleans (`true`/`false`), `null`, lists, and nested dicts.
1149
+ - Re-importing re-embeds all text using your currently configured model — use the same model as the original collection to preserve semantic accuracy.
1150
+ - Parent directories of the output path are created automatically.
1151
+
1152
+ ---
1153
+
889
1154
  ## Embedding Models
890
1155
 
891
1156
  QQL uses [Fastembed](https://github.com/qdrant/fastembed) to convert text into vectors locally — no external API call is needed.
@@ -1057,15 +1322,15 @@ result = run_query(
1057
1322
  "INSERT INTO COLLECTION notes VALUES {'text': 'hello world', 'author': 'alice', 'year': 2024}",
1058
1323
  url="http://localhost:6333",
1059
1324
  )
1060
- print(result.message) # "Inserted 1 point [<uuid>]"
1061
- print(result.data) # {"id": "...", "collection": "notes"}
1325
+ print(result.message) # "Inserted 1 point [<id>]"
1326
+ print(result.data) # {"id": 1001 or "<uuid>", "collection": "notes"}
1062
1327
 
1063
1328
  # Insert with hybrid vectors
1064
1329
  result = run_query(
1065
1330
  "INSERT INTO COLLECTION notes VALUES {'text': 'hello world'} USING HYBRID",
1066
1331
  url="http://localhost:6333",
1067
1332
  )
1068
- print(result.message) # "Inserted 1 point [<uuid>] (hybrid)"
1333
+ print(result.message) # "Inserted 1 point [<id>] (hybrid)"
1069
1334
 
1070
1335
  # Dense search with WHERE filter
1071
1336
  result = run_query(
@@ -1120,9 +1385,10 @@ class ExecutionResult:
1120
1385
 
1121
1386
  | Operation | `result.data` type |
1122
1387
  |---|---|
1123
- | INSERT (dense) | `{"id": "<uuid>", "collection": "<name>"}` |
1124
- | INSERT (hybrid) | `{"id": "<uuid>", "collection": "<name>"}` |
1388
+ | INSERT (dense) | `{"id": int | "<uuid>", "collection": "<name>"}` |
1389
+ | INSERT (hybrid) | `{"id": int | "<uuid>", "collection": "<name>"}` |
1125
1390
  | SEARCH | `[{"id": str, "score": float, "payload": dict}, ...]` |
1391
+ | RECOMMEND | `[{"id": str, "score": float, "payload": dict}, ...]` |
1126
1392
  | SHOW COLLECTIONS | `["name1", "name2", ...]` |
1127
1393
  | CREATE COLLECTION | `None` |
1128
1394
  | DROP COLLECTION | `None` |