python-nlql 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_nlql-0.2.0 → python_nlql-0.3.0}/PKG-INFO +34 -32
- python_nlql-0.3.0/README.md +84 -0
- python_nlql-0.2.0/README.md → python_nlql-0.3.0/README.zh-CN.md +3 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/benchmarks/bench.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/index-cache.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/ingestion.md +5 -5
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/registry.md +3 -3
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/document-loading.md +2 -2
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/hybrid-stores.md +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/llm-function-calling.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/query-builder.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/quickstart.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/reranking.md +6 -6
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/index-cache.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/ingestion.md +5 -5
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/registry.md +3 -3
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/document-loading.md +2 -2
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/hybrid-stores.md +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/llm-function-calling.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/query-builder.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/quickstart.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/reranking.md +6 -6
- python_nlql-0.3.0/eval/README.md +59 -0
- python_nlql-0.3.0/eval/data/__init__.py +0 -0
- python_nlql-0.3.0/eval/data/datasets.py +194 -0
- python_nlql-0.3.0/eval/judge.py +36 -0
- python_nlql-0.3.0/eval/llm.py +49 -0
- python_nlql-0.3.0/eval/pipelines/__init__.py +0 -0
- python_nlql-0.3.0/eval/pipelines/base.py +19 -0
- python_nlql-0.3.0/eval/pipelines/langchain_rag.py +43 -0
- python_nlql-0.3.0/eval/pipelines/nlql_rag.py +30 -0
- python_nlql-0.3.0/eval/report.md +234 -0
- python_nlql-0.3.0/eval/run.py +211 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/document_loading.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/hybrid_stores.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/llm_function_calling.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/multivector.py +3 -2
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/openai_semantic.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/quickstart.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/reranking.py +4 -3
- {python_nlql-0.2.0 → python_nlql-0.3.0}/pyproject.toml +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/__init__.py +10 -16
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/sdk/CLAUDE.md +1 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/sdk/engine.py +2 -1
- python_nlql-0.3.0/tests/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_cross_store.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_expressions.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_loaders.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_multivector.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_pgvector_store.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_rerank.py +3 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_sdk.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_types.py +2 -1
- {python_nlql-0.2.0 → python_nlql-0.3.0}/.claude/index.json +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/.claude/launch.json +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/.github/workflows/docs.yml +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/.github/workflows/publish.yml +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/.gitignore +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/DESIGN.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/LICENSE +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/benchmarks/README.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/about/license.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/architecture.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/data-model.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/evaluation.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/hybrid-pushdown.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/multimodal.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/overview.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/store-protocol.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/syntax.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/three-entries.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/concepts/type-system.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/content/tutorials/multimodal-search.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/index.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/performance.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/embed.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/errors.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/exec.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/index.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/ingest.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/ir.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/lang.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/loaders.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/model.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/plan.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/registry.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/rerank.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/sdk.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/store.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/en/reference/types.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/hooks.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/about/license.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/architecture.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/data-model.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/evaluation.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/hybrid-pushdown.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/multimodal.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/overview.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/store-protocol.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/syntax.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/three-entries.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/concepts/type-system.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/content/tutorials/multimodal-search.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/index.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/performance.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/embed.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/errors.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/exec.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/index.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/ingest.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/ir.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/lang.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/loaders.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/model.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/plan.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/registry.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/rerank.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/sdk.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/store.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/docs/zh/reference/types.md +0 -0
- {python_nlql-0.2.0/tests → python_nlql-0.3.0/eval}/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/doubao_vision_search.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/examples/multimodal_search.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/mkdocs.yml +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/requirements-docs.txt +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/base.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/cache.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/clip.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/doubao.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/fake.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/multimodal.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/openai.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/embed/sentence_transformers.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/errors.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/errors.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/exec/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/exec/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/exec/evaluate.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/exec/executor.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/language.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/normalize.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/pipeline.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ingest/splitters.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ir/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ir/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ir/nodes.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/ir/schema.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/lang/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/lang/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/lang/grammar.lark +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/lang/parser.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/lang/transformer.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/base.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/docx_loader.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/pdf_loader.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/loaders/text.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/document.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/payload.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/unit.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/model/vector.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/plan/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/plan/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/plan/plan.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/plan/planner.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/plan/pushdown.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/registry/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/registry/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/registry/builtins.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/registry/core.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/rerank/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/rerank/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/rerank/base.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/rerank/cross_encoder.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/sdk/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/sdk/builder.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/base.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/chroma_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/columns.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/common.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/faiss_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/filter.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/hnsw_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/local.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/pgvector_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/store/qdrant_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/types/CLAUDE.md +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/types/__init__.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/types/coerce.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/src/nlql/types/core.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/conftest.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_builder.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_chroma_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_columns.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_doubao.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_embed.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_exec.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_faiss_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_hnsw_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_ingest.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_ir.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_lang.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_language.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_model.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_multimodal.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_pushdown.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_qdrant_store.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_registry.py +0 -0
- {python_nlql-0.2.0 → python_nlql-0.3.0}/tests/test_store.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-nlql
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: SQL-style semantic query language and retrieval middleware for Agents & RAG
|
|
5
5
|
Project-URL: Repository, https://github.com/natural-language-query-language/python-nlql
|
|
6
6
|
Author: Okysu
|
|
@@ -57,16 +57,18 @@ Description-Content-Type: text/markdown
|
|
|
57
57
|
[](LICENSE)
|
|
58
58
|
[](https://natural-language-query-language.github.io/python-nlql/)
|
|
59
59
|
|
|
60
|
-
|
|
60
|
+
**English** · [简体中文](README.zh-CN.md) · [在线文档](https://natural-language-query-language.github.io/python-nlql/)
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
NLQL lets you do semantic search with SQL-style statements. Relevance scoring, filtering, and sorting live in one query — no more scattered embedding calls and post-processing code.
|
|
63
63
|
|
|
64
|
-
|
|
64
|
+
Built for Agent and RAG applications: the query itself is structured data, usable directly as an LLM tool-call payload.
|
|
65
|
+
|
|
66
|
+
## What it looks like
|
|
65
67
|
|
|
66
68
|
```python
|
|
67
69
|
import nlql
|
|
68
70
|
|
|
69
|
-
engine = nlql.Engine(nlql.FakeEmbedder()) #
|
|
71
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder()) # or OpenAIEmbedder, or any Embedder
|
|
70
72
|
engine.add_text("AI agents plan tasks and call tools.", metadata={"status": "published"})
|
|
71
73
|
engine.add_text("Banana bread needs flour and sugar.", metadata={"status": "draft"})
|
|
72
74
|
|
|
@@ -80,54 +82,54 @@ for unit in engine.search('''
|
|
|
80
82
|
print(f"{unit.scores['rel']:+.3f} {unit.content}")
|
|
81
83
|
```
|
|
82
84
|
|
|
83
|
-
|
|
85
|
+
The statement reads almost like SQL: `SELECT` sets the return granularity, `LET` computes relevance, `WHERE` filters, `ORDER BY` / `LIMIT` sort and cap.
|
|
84
86
|
|
|
85
|
-
##
|
|
87
|
+
## Features
|
|
86
88
|
|
|
87
|
-
-
|
|
88
|
-
-
|
|
89
|
-
-
|
|
90
|
-
-
|
|
91
|
-
-
|
|
92
|
-
-
|
|
89
|
+
- **One statement, full intent** — relevance, filtering, and sorting in one place, not scattered across business code
|
|
90
|
+
- **Three ways to write, identical results** — SQL statement, Python chained builder, or JSON IR; all compile to the same internal representation
|
|
91
|
+
- **Pluggable backends** — built-in store works out of the box; switch to Qdrant / Faiss / Chroma / HnswLib / pgvector with one line
|
|
92
|
+
- **Two-stage retrieval** — attach a reranker after recall for higher accuracy
|
|
93
|
+
- **Multimodal** — text and images share one vector space; retrieve images with text
|
|
94
|
+
- **Explainable** — `engine.explain()` prints the query plan
|
|
93
95
|
|
|
94
|
-
##
|
|
96
|
+
## Installation
|
|
95
97
|
|
|
96
98
|
```bash
|
|
97
99
|
pip install python-nlql
|
|
98
100
|
```
|
|
99
101
|
|
|
100
|
-
|
|
102
|
+
Optional extras:
|
|
101
103
|
|
|
102
|
-
|
|
|
104
|
+
| Command | Purpose |
|
|
103
105
|
|---|---|
|
|
104
|
-
| `pip install "python-nlql[faiss]"` | Faiss
|
|
105
|
-
| `pip install "python-nlql[hnsw]"` | HnswLib
|
|
106
|
-
| `pip install "python-nlql[qdrant]"` | Qdrant
|
|
107
|
-
| `pip install "python-nlql[chroma]"` | Chroma
|
|
108
|
-
| `pip install "python-nlql[pgvector]"` | Postgres + pgvector
|
|
109
|
-
| `pip install "python-nlql[local]"` |
|
|
110
|
-
| `pip install "python-nlql[loaders]"` |
|
|
106
|
+
| `pip install "python-nlql[faiss]"` | Faiss backend |
|
|
107
|
+
| `pip install "python-nlql[hnsw]"` | HnswLib backend (for large-scale data) |
|
|
108
|
+
| `pip install "python-nlql[qdrant]"` | Qdrant backend |
|
|
109
|
+
| `pip install "python-nlql[chroma]"` | Chroma backend |
|
|
110
|
+
| `pip install "python-nlql[pgvector]"` | Postgres + pgvector backend |
|
|
111
|
+
| `pip install "python-nlql[local]"` | local sentence-transformers / CLIP / cross-encoder |
|
|
112
|
+
| `pip install "python-nlql[loaders]"` | DOCX / PDF file loaders |
|
|
111
113
|
|
|
112
|
-
##
|
|
114
|
+
## Switching backends
|
|
113
115
|
|
|
114
|
-
|
|
116
|
+
One line; ingestion and query code stay the same:
|
|
115
117
|
|
|
116
118
|
```python
|
|
117
119
|
from nlql.store.qdrant_store import QdrantStore
|
|
118
120
|
engine = nlql.Engine(embedder, store=QdrantStore(location=":memory:"))
|
|
119
121
|
```
|
|
120
122
|
|
|
121
|
-
##
|
|
123
|
+
## Documentation
|
|
122
124
|
|
|
123
|
-
|
|
125
|
+
Full docs, tutorials, and API reference: **https://natural-language-query-language.github.io/python-nlql/en/**
|
|
124
126
|
|
|
125
|
-
- [
|
|
126
|
-
- [
|
|
127
|
-
- [API
|
|
128
|
-
- [
|
|
127
|
+
- [Quick start](https://natural-language-query-language.github.io/python-nlql/en/content/tutorials/quickstart/)
|
|
128
|
+
- [Design](https://natural-language-query-language.github.io/python-nlql/en/content/concepts/overview/)
|
|
129
|
+
- [API reference](https://natural-language-query-language.github.io/python-nlql/en/reference/sdk/)
|
|
130
|
+
- [中文文档](https://natural-language-query-language.github.io/python-nlql/)
|
|
129
131
|
|
|
130
|
-
|
|
132
|
+
More examples in the [`examples/`](examples/) directory.
|
|
131
133
|
|
|
132
134
|
## License
|
|
133
135
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# NLQL
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/python-nlql/)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://natural-language-query-language.github.io/python-nlql/)
|
|
7
|
+
|
|
8
|
+
**English** · [简体中文](README.zh-CN.md) · [在线文档](https://natural-language-query-language.github.io/python-nlql/)
|
|
9
|
+
|
|
10
|
+
NLQL lets you do semantic search with SQL-style statements. Relevance scoring, filtering, and sorting live in one query — no more scattered embedding calls and post-processing code.
|
|
11
|
+
|
|
12
|
+
Built for Agent and RAG applications: the query itself is structured data, usable directly as an LLM tool-call payload.
|
|
13
|
+
|
|
14
|
+
## What it looks like
|
|
15
|
+
|
|
16
|
+
```python
|
|
17
|
+
import nlql
|
|
18
|
+
|
|
19
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder()) # or OpenAIEmbedder, or any Embedder
|
|
20
|
+
engine.add_text("AI agents plan tasks and call tools.", metadata={"status": "published"})
|
|
21
|
+
engine.add_text("Banana bread needs flour and sugar.", metadata={"status": "draft"})
|
|
22
|
+
|
|
23
|
+
for unit in engine.search('''
|
|
24
|
+
SELECT SENTENCE
|
|
25
|
+
LET rel = SIMILARITY(content, "autonomous agents")
|
|
26
|
+
WHERE rel >= 0.2 AND meta.status == "published"
|
|
27
|
+
ORDER BY rel DESC
|
|
28
|
+
LIMIT 5
|
|
29
|
+
'''):
|
|
30
|
+
print(f"{unit.scores['rel']:+.3f} {unit.content}")
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
The statement reads almost like SQL: `SELECT` sets the return granularity, `LET` computes relevance, `WHERE` filters, `ORDER BY` / `LIMIT` sort and cap.
|
|
34
|
+
|
|
35
|
+
## Features
|
|
36
|
+
|
|
37
|
+
- **One statement, full intent** — relevance, filtering, and sorting in one place, not scattered across business code
|
|
38
|
+
- **Three ways to write, identical results** — SQL statement, Python chained builder, or JSON IR; all compile to the same internal representation
|
|
39
|
+
- **Pluggable backends** — built-in store works out of the box; switch to Qdrant / Faiss / Chroma / HnswLib / pgvector with one line
|
|
40
|
+
- **Two-stage retrieval** — attach a reranker after recall for higher accuracy
|
|
41
|
+
- **Multimodal** — text and images share one vector space; retrieve images with text
|
|
42
|
+
- **Explainable** — `engine.explain()` prints the query plan
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install python-nlql
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Optional extras:
|
|
51
|
+
|
|
52
|
+
| Command | Purpose |
|
|
53
|
+
|---|---|
|
|
54
|
+
| `pip install "python-nlql[faiss]"` | Faiss backend |
|
|
55
|
+
| `pip install "python-nlql[hnsw]"` | HnswLib backend (for large-scale data) |
|
|
56
|
+
| `pip install "python-nlql[qdrant]"` | Qdrant backend |
|
|
57
|
+
| `pip install "python-nlql[chroma]"` | Chroma backend |
|
|
58
|
+
| `pip install "python-nlql[pgvector]"` | Postgres + pgvector backend |
|
|
59
|
+
| `pip install "python-nlql[local]"` | local sentence-transformers / CLIP / cross-encoder |
|
|
60
|
+
| `pip install "python-nlql[loaders]"` | DOCX / PDF file loaders |
|
|
61
|
+
|
|
62
|
+
## Switching backends
|
|
63
|
+
|
|
64
|
+
One line; ingestion and query code stay the same:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from nlql.store.qdrant_store import QdrantStore
|
|
68
|
+
engine = nlql.Engine(embedder, store=QdrantStore(location=":memory:"))
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Documentation
|
|
72
|
+
|
|
73
|
+
Full docs, tutorials, and API reference: **https://natural-language-query-language.github.io/python-nlql/en/**
|
|
74
|
+
|
|
75
|
+
- [Quick start](https://natural-language-query-language.github.io/python-nlql/en/content/tutorials/quickstart/)
|
|
76
|
+
- [Design](https://natural-language-query-language.github.io/python-nlql/en/content/concepts/overview/)
|
|
77
|
+
- [API reference](https://natural-language-query-language.github.io/python-nlql/en/reference/sdk/)
|
|
78
|
+
- [中文文档](https://natural-language-query-language.github.io/python-nlql/)
|
|
79
|
+
|
|
80
|
+
More examples in the [`examples/`](examples/) directory.
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
[MIT](LICENSE)
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
[](LICENSE)
|
|
6
6
|
[](https://natural-language-query-language.github.io/python-nlql/)
|
|
7
7
|
|
|
8
|
+
[English](README.md) · **简体中文** · [在线文档](https://natural-language-query-language.github.io/python-nlql/)
|
|
9
|
+
|
|
8
10
|
NLQL 让你用类似 SQL 的语句做语义检索。把"从文本里找相关内容"这件事,变得像查数据库一样直接——相关度计算、过滤、排序写在一条查询里,不再需要拼凑 embedding 调用和后处理代码。
|
|
9
11
|
|
|
10
12
|
适合 Agent 与 RAG 应用:查询本身就是结构化数据,可以直接作为大模型的工具调用载体。
|
|
@@ -14,7 +16,7 @@ NLQL 让你用类似 SQL 的语句做语义检索。把"从文本里找相关内
|
|
|
14
16
|
```python
|
|
15
17
|
import nlql
|
|
16
18
|
|
|
17
|
-
engine = nlql.Engine(nlql.FakeEmbedder()) # 或 OpenAIEmbedder,以及任意 Embedder 实现
|
|
19
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder()) # 或 OpenAIEmbedder,以及任意 Embedder 实现
|
|
18
20
|
engine.add_text("AI agents plan tasks and call tools.", metadata={"status": "published"})
|
|
19
21
|
engine.add_text("Banana bread needs flour and sugar.", metadata={"status": "draft"})
|
|
20
22
|
|
|
@@ -5,7 +5,7 @@ Vectors are computed at ingestion time and stored in the index; they are not rec
|
|
|
5
5
|
```python
|
|
6
6
|
import nlql
|
|
7
7
|
|
|
8
|
-
engine = nlql.Engine(nlql.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
8
|
+
engine = nlql.Engine(nlql.embed.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
9
9
|
engine.add_text("AI agents plan tasks and call external tools.")
|
|
10
10
|
engine.add_text("Banana bread is a quick loaf made with ripe bananas.")
|
|
11
11
|
|
|
@@ -5,7 +5,7 @@ When a document enters NLQL it passes through four stages in order: normalize, s
|
|
|
5
5
|
```python
|
|
6
6
|
import nlql
|
|
7
7
|
|
|
8
|
-
engine = nlql.Engine(nlql.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
8
|
+
engine = nlql.Engine(nlql.embed.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
9
9
|
doc_id = engine.add_text(
|
|
10
10
|
"AI agents plan tasks. They keep memory and call external tools.",
|
|
11
11
|
metadata={"status": "published", "year": 2026},
|
|
@@ -26,8 +26,8 @@ Before splitting, text is normalized: whitespace and line breaks are unified, an
|
|
|
26
26
|
The normalized text is sliced into units by the splitter for the active granularity. The default is by sentence (`SENTENCE`), and the built-in splitter covers Chinese, English, Japanese, and CJK punctuation.
|
|
27
27
|
|
|
28
28
|
```python
|
|
29
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="sentence") # default
|
|
30
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="chunk") # use the chunk splitter instead
|
|
29
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="sentence") # default
|
|
30
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="chunk") # use the chunk splitter instead
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
Splitting happens at ingest and is reused at query time — the boundaries returned by `SELECT SENTENCE` and `SELECT SPAN(SENTENCE, window => n)` both come from this stage; there is no on-the-fly re-splitting at query time.
|
|
@@ -64,7 +64,7 @@ engine.add_documents([
|
|
|
64
64
|
```python
|
|
65
65
|
import nlql
|
|
66
66
|
|
|
67
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
67
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
68
68
|
ids = engine.add_files(["agents.txt", "rag.md"])
|
|
69
69
|
print(f"loaded into {len(engine)} units: {ids}")
|
|
70
70
|
```
|
|
@@ -78,7 +78,7 @@ print(f"loaded into {len(engine)} units: {ids}")
|
|
|
78
78
|
- **Custom granularity** — register your own splitter (see [Registry and Extension](./registry.md)), for example by paragraph or by chapter.
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="chunk")
|
|
81
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="chunk")
|
|
82
82
|
engine.add_file("long_document.md")
|
|
83
83
|
# each chunk is one retrieval unit
|
|
84
84
|
```
|
|
@@ -50,7 +50,7 @@ def my_fn(text: str) -> float: ...
|
|
|
50
50
|
**Instance-level registration** — applies only to the current engine and does not leak to other instances:
|
|
51
51
|
|
|
52
52
|
```python
|
|
53
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
53
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
54
54
|
|
|
55
55
|
@engine.register_function("TEMP_SCORE")
|
|
56
56
|
def temp_score(text: str) -> float: ...
|
|
@@ -71,7 +71,7 @@ def pysbd_sentences(text: str) -> list[str]:
|
|
|
71
71
|
seg = pysbd.Segmenter(language="en", clean=False)
|
|
72
72
|
return seg.segment(text)
|
|
73
73
|
|
|
74
|
-
engine = nlql.Engine(nlql.FakeEmbedder()) # the splitter above is used automatically at ingest
|
|
74
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder()) # the splitter above is used automatically at ingest
|
|
75
75
|
```
|
|
76
76
|
|
|
77
77
|
The same mechanism uses the splitter at both ingest and query time, so the boundaries returned by `SELECT SENTENCE` / `SELECT SPAN(SENTENCE, window => n)` match those from ingestion — there is no mismatch from re-splitting on the fly at query time.
|
|
@@ -83,7 +83,7 @@ You can also register a new granularity name (such as `"paragraph"`) and specify
|
|
|
83
83
|
def split_paragraphs(text: str) -> list[str]:
|
|
84
84
|
return [p for p in text.split("\n\n") if p.strip()]
|
|
85
85
|
|
|
86
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="paragraph")
|
|
86
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="paragraph")
|
|
87
87
|
```
|
|
88
88
|
|
|
89
89
|
## Custom Embedders
|
|
@@ -25,7 +25,7 @@ files = [tmp / "agents.txt", tmp / "rag.md"]
|
|
|
25
25
|
`add_files` takes a list of paths and dispatches a loader per file based on its extension. `.txt` and `.md` go through the plain-text loader, which works out of the box.
|
|
26
26
|
|
|
27
27
|
```python
|
|
28
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
28
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
29
29
|
ids = engine.add_files([str(f) for f in files])
|
|
30
30
|
print(f"loaded {len(ids)} files -> {len(engine)} sentence units: {ids}")
|
|
31
31
|
```
|
|
@@ -53,7 +53,7 @@ try:
|
|
|
53
53
|
except ImportError:
|
|
54
54
|
print("(python-docx not installed — skipping the .docx file)")
|
|
55
55
|
|
|
56
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
56
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
57
57
|
ids = engine.add_files([str(f) for f in files])
|
|
58
58
|
```
|
|
59
59
|
|
|
@@ -9,7 +9,8 @@ All backends conform to the same `Store` interface. The engine preferentially us
|
|
|
9
9
|
## Example
|
|
10
10
|
|
|
11
11
|
```python
|
|
12
|
-
from nlql import Document, Engine
|
|
12
|
+
from nlql import Document, Engine
|
|
13
|
+
from nlql.embed import FakeEmbedder
|
|
13
14
|
from nlql.store import LocalStore
|
|
14
15
|
|
|
15
16
|
CORPUS = [
|
|
@@ -8,7 +8,7 @@ The following example uses `FakeEmbedder`, which requires no network access or m
|
|
|
8
8
|
import json
|
|
9
9
|
import nlql
|
|
10
10
|
|
|
11
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
11
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
12
12
|
engine.add_text("AI agents use planning, memory, and tool use.",
|
|
13
13
|
metadata={"status": "published"})
|
|
14
14
|
engine.add_text("Vector databases store embeddings for similarity search.",
|
|
@@ -8,7 +8,7 @@ The following example uses `FakeEmbedder`, which requires no network access or m
|
|
|
8
8
|
import nlql
|
|
9
9
|
from nlql.sdk.builder import select, similarity, Meta, F
|
|
10
10
|
|
|
11
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
11
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
12
12
|
engine.add_text("AI agents plan tasks, keep memory, and call external tools.",
|
|
13
13
|
id="doc-0", metadata={"status": "published", "topic": "agents"})
|
|
14
14
|
engine.add_text("Retrieval-augmented generation grounds LLM answers in your documents.",
|
|
@@ -10,7 +10,7 @@ The following example uses `FakeEmbedder`, which requires no network access or m
|
|
|
10
10
|
```python
|
|
11
11
|
import nlql
|
|
12
12
|
|
|
13
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
13
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
14
14
|
|
|
15
15
|
engine.add_text("AI agents plan tasks, keep memory, and call external tools.",
|
|
16
16
|
id="doc-0", metadata={"status": "published", "topic": "agents"})
|
|
@@ -6,7 +6,7 @@ This example uses `FakeEmbedder` and `FakeReranker` for an offline demonstration
|
|
|
6
6
|
|
|
7
7
|
```python
|
|
8
8
|
import nlql
|
|
9
|
-
from nlql import FakeReranker
|
|
9
|
+
from nlql.rerank import FakeReranker
|
|
10
10
|
|
|
11
11
|
DOCS = [
|
|
12
12
|
# Contains all query terms but is very long; dual-encoder similarity gets diluted
|
|
@@ -20,7 +20,7 @@ QUERY = 'SELECT SENTENCE LET rel = SIMILARITY(content, "agent memory planning to
|
|
|
20
20
|
## Without a reranker
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), reranker=None, rerank_factor=10)
|
|
23
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), reranker=None, rerank_factor=10)
|
|
24
24
|
for text, doc_id in DOCS:
|
|
25
25
|
engine.add_text(text, id=doc_id)
|
|
26
26
|
|
|
@@ -34,7 +34,7 @@ The `full` document covers every query term, but because the sentence is long, i
|
|
|
34
34
|
## With a reranker
|
|
35
35
|
|
|
36
36
|
```python
|
|
37
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), reranker=FakeReranker(), rerank_factor=10)
|
|
37
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), reranker=FakeReranker(), rerank_factor=10)
|
|
38
38
|
for text, doc_id in DOCS:
|
|
39
39
|
engine.add_text(text, id=doc_id)
|
|
40
40
|
|
|
@@ -52,7 +52,7 @@ The `Reranker` protocol requires `rerank(query, units) -> units`: it takes the q
|
|
|
52
52
|
`rerank_factor` controls the over-fetch multiple: the final `limit` multiplied by this factor gives the recall count. A larger factor yields more complete recall and leans more on the reranker for precision, but is also slower. Common values range from 5 to 20.
|
|
53
53
|
|
|
54
54
|
```python
|
|
55
|
-
nlql.Engine(nlql.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
55
|
+
nlql.Engine(nlql.embed.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
## CrossEncoder for production
|
|
@@ -60,10 +60,10 @@ nlql.Engine(nlql.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
|
60
60
|
`FakeReranker` is for demonstration only. In production, replace it with a real reranker:
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from nlql import CrossEncoderReranker
|
|
63
|
+
from nlql.rerank import CrossEncoderReranker
|
|
64
64
|
|
|
65
65
|
engine = nlql.Engine(
|
|
66
|
-
nlql.OpenAIEmbedder(),
|
|
66
|
+
nlql.embed.OpenAIEmbedder(),
|
|
67
67
|
reranker=CrossEncoderReranker(model="cross-encoder/ms-marco-MiniLM-L-6-v2"),
|
|
68
68
|
rerank_factor=5,
|
|
69
69
|
)
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
```python
|
|
6
6
|
import nlql
|
|
7
7
|
|
|
8
|
-
engine = nlql.Engine(nlql.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
8
|
+
engine = nlql.Engine(nlql.embed.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
9
9
|
engine.add_text("AI agents plan tasks and call external tools.")
|
|
10
10
|
engine.add_text("Banana bread is a quick loaf made with ripe bananas.")
|
|
11
11
|
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
```python
|
|
6
6
|
import nlql
|
|
7
7
|
|
|
8
|
-
engine = nlql.Engine(nlql.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
8
|
+
engine = nlql.Engine(nlql.embed.OpenAIEmbedder(base_url="...", api_key="..."))
|
|
9
9
|
doc_id = engine.add_text(
|
|
10
10
|
"AI agents plan tasks. They keep memory and call external tools.",
|
|
11
11
|
metadata={"status": "published", "year": 2026},
|
|
@@ -26,8 +26,8 @@ results = engine.search(
|
|
|
26
26
|
规整后的文本按当前粒度对应的分词器切成单元。默认按句切(`SENTENCE`),内置分词器覆盖中、英、日及 CJK 标点。
|
|
27
27
|
|
|
28
28
|
```python
|
|
29
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="sentence") # 默认
|
|
30
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="chunk") # 改用 chunk 分词器
|
|
29
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="sentence") # 默认
|
|
30
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="chunk") # 改用 chunk 分词器
|
|
31
31
|
```
|
|
32
32
|
|
|
33
33
|
切分在写入时完成、查询时复用——`SELECT SENTENCE` 与 `SELECT SPAN(SENTENCE, window => n)` 返回的边界都来自这一步,不会出现查询时临时重切。
|
|
@@ -64,7 +64,7 @@ engine.add_documents([
|
|
|
64
64
|
```python
|
|
65
65
|
import nlql
|
|
66
66
|
|
|
67
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
67
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
68
68
|
ids = engine.add_files(["agents.txt", "rag.md"])
|
|
69
69
|
print(f"loaded into {len(engine)} units: {ids}")
|
|
70
70
|
```
|
|
@@ -78,7 +78,7 @@ print(f"loaded into {len(engine)} units: {ids}")
|
|
|
78
78
|
- **自定义粒度** —— 注册自己的分词器即可(见 [注册与扩展](./registry.md)),比如按段落、按章节。
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="chunk")
|
|
81
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="chunk")
|
|
82
82
|
engine.add_file("long_document.md")
|
|
83
83
|
# 每个 chunk 是一个检索单元
|
|
84
84
|
```
|
|
@@ -50,7 +50,7 @@ def my_fn(text: str) -> float: ...
|
|
|
50
50
|
**实例级注册**——只对当前引擎生效,不泄漏到其它实例:
|
|
51
51
|
|
|
52
52
|
```python
|
|
53
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
53
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
54
54
|
|
|
55
55
|
@engine.register_function("TEMP_SCORE")
|
|
56
56
|
def temp_score(text: str) -> float: ...
|
|
@@ -71,7 +71,7 @@ def pysbd_sentences(text: str) -> list[str]:
|
|
|
71
71
|
seg = pysbd.Segmenter(language="en", clean=False)
|
|
72
72
|
return seg.segment(text)
|
|
73
73
|
|
|
74
|
-
engine = nlql.Engine(nlql.FakeEmbedder()) # 写入时自动用上面的分词器
|
|
74
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder()) # 写入时自动用上面的分词器
|
|
75
75
|
```
|
|
76
76
|
|
|
77
77
|
分词器在写入和查询时被同一套机制使用,因此 `SELECT SENTENCE` / `SELECT SPAN(SENTENCE, window => n)` 返回的边界与写入时一致,不会出现查询时临时重切导致的不匹配。
|
|
@@ -83,7 +83,7 @@ engine = nlql.Engine(nlql.FakeEmbedder()) # 写入时自动用上面的分词
|
|
|
83
83
|
def split_paragraphs(text: str) -> list[str]:
|
|
84
84
|
return [p for p in text.split("\n\n") if p.strip()]
|
|
85
85
|
|
|
86
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), granularity="paragraph")
|
|
86
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), granularity="paragraph")
|
|
87
87
|
```
|
|
88
88
|
|
|
89
89
|
## 自定义 embedder
|
|
@@ -25,7 +25,7 @@ files = [tmp / "agents.txt", tmp / "rag.md"]
|
|
|
25
25
|
`add_files` 接收路径列表,内部对每个文件按扩展名分派加载器。`.txt` 与 `.md` 走纯文本加载器,开箱即用。
|
|
26
26
|
|
|
27
27
|
```python
|
|
28
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
28
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
29
29
|
ids = engine.add_files([str(f) for f in files])
|
|
30
30
|
print(f"loaded {len(ids)} files -> {len(engine)} sentence units: {ids}")
|
|
31
31
|
```
|
|
@@ -53,7 +53,7 @@ try:
|
|
|
53
53
|
except ImportError:
|
|
54
54
|
print("(python-docx not installed — skipping the .docx file)")
|
|
55
55
|
|
|
56
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
56
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
57
57
|
ids = engine.add_files([str(f) for f in files])
|
|
58
58
|
```
|
|
59
59
|
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
import json
|
|
9
9
|
import nlql
|
|
10
10
|
|
|
11
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
11
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
12
12
|
engine.add_text("AI agents use planning, memory, and tool use.",
|
|
13
13
|
metadata={"status": "published"})
|
|
14
14
|
engine.add_text("Vector databases store embeddings for similarity search.",
|
|
@@ -8,7 +8,7 @@ Query Builder 用 Python 链式调用构造查询,与 NLQL 字符串编译到
|
|
|
8
8
|
import nlql
|
|
9
9
|
from nlql.sdk.builder import select, similarity, Meta, F
|
|
10
10
|
|
|
11
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
11
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
12
12
|
engine.add_text("AI agents plan tasks, keep memory, and call external tools.",
|
|
13
13
|
id="doc-0", metadata={"status": "published", "topic": "agents"})
|
|
14
14
|
engine.add_text("Retrieval-augmented generation grounds LLM answers in your documents.",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
```python
|
|
11
11
|
import nlql
|
|
12
12
|
|
|
13
|
-
engine = nlql.Engine(nlql.FakeEmbedder())
|
|
13
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder())
|
|
14
14
|
|
|
15
15
|
engine.add_text("AI agents plan tasks, keep memory, and call external tools.",
|
|
16
16
|
id="doc-0", metadata={"status": "published", "topic": "agents"})
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
```python
|
|
8
8
|
import nlql
|
|
9
|
-
from nlql import FakeReranker
|
|
9
|
+
from nlql.rerank import FakeReranker
|
|
10
10
|
|
|
11
11
|
DOCS = [
|
|
12
12
|
# 包含全部查询词但很长,双塔相似度被稀释
|
|
@@ -20,7 +20,7 @@ QUERY = 'SELECT SENTENCE LET rel = SIMILARITY(content, "agent memory planning to
|
|
|
20
20
|
## 不加重排器
|
|
21
21
|
|
|
22
22
|
```python
|
|
23
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), reranker=None, rerank_factor=10)
|
|
23
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), reranker=None, rerank_factor=10)
|
|
24
24
|
for text, doc_id in DOCS:
|
|
25
25
|
engine.add_text(text, id=doc_id)
|
|
26
26
|
|
|
@@ -34,7 +34,7 @@ for unit in engine.search(QUERY):
|
|
|
34
34
|
## 加重排器
|
|
35
35
|
|
|
36
36
|
```python
|
|
37
|
-
engine = nlql.Engine(nlql.FakeEmbedder(), reranker=FakeReranker(), rerank_factor=10)
|
|
37
|
+
engine = nlql.Engine(nlql.embed.FakeEmbedder(), reranker=FakeReranker(), rerank_factor=10)
|
|
38
38
|
for text, doc_id in DOCS:
|
|
39
39
|
engine.add_text(text, id=doc_id)
|
|
40
40
|
|
|
@@ -52,7 +52,7 @@ for unit in engine.search(QUERY):
|
|
|
52
52
|
`rerank_factor` 控制过取倍数:最终需要的 `limit` 乘以这个倍数得到召回数量。倍数越大召回越全、精度越依赖重排器,但也越慢。常用值在 5 到 20 之间。
|
|
53
53
|
|
|
54
54
|
```python
|
|
55
|
-
nlql.Engine(nlql.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
55
|
+
nlql.Engine(nlql.embed.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
## 生产用 CrossEncoder
|
|
@@ -60,10 +60,10 @@ nlql.Engine(nlql.OpenAIEmbedder(), reranker=FakeReranker(), rerank_factor=5)
|
|
|
60
60
|
`FakeReranker` 仅用于演示。生产中替换为真实重排器:
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from nlql import CrossEncoderReranker
|
|
63
|
+
from nlql.rerank import CrossEncoderReranker
|
|
64
64
|
|
|
65
65
|
engine = nlql.Engine(
|
|
66
|
-
nlql.OpenAIEmbedder(),
|
|
66
|
+
nlql.embed.OpenAIEmbedder(),
|
|
67
67
|
reranker=CrossEncoderReranker(model="cross-encoder/ms-marco-MiniLM-L-6-v2"),
|
|
68
68
|
rerank_factor=5,
|
|
69
69
|
)
|