heavenbase 0.1.0.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- heavenbase/__init__.py +154 -0
- heavenbase/_bootstrap.py +90 -0
- heavenbase/backends/__init__.py +80 -0
- heavenbase/backends/base.py +280 -0
- heavenbase/backends/elasticsearch/__init__.py +7 -0
- heavenbase/backends/elasticsearch/backend.py +228 -0
- heavenbase/backends/families.py +42 -0
- heavenbase/backends/file/__init__.py +11 -0
- heavenbase/backends/file/base.py +210 -0
- heavenbase/backends/file/json.py +22 -0
- heavenbase/backends/file/pickle.py +22 -0
- heavenbase/backends/inmem/__init__.py +7 -0
- heavenbase/backends/inmem/backend.py +67 -0
- heavenbase/backends/registry.py +391 -0
- heavenbase/backends/sql/__init__.py +25 -0
- heavenbase/backends/sql/base.py +637 -0
- heavenbase/backends/sql/duckdb.py +18 -0
- heavenbase/backends/sql/mssql.py +22 -0
- heavenbase/backends/sql/mysql.py +18 -0
- heavenbase/backends/sql/oceanbase.py +17 -0
- heavenbase/backends/sql/oracle.py +18 -0
- heavenbase/backends/sql/postgres.py +18 -0
- heavenbase/backends/sql/sqlite.py +18 -0
- heavenbase/backends/sql/starrocks.py +130 -0
- heavenbase/backends/sql/trino.py +17 -0
- heavenbase/backends/type_registry.py +163 -0
- heavenbase/backends/vector/__init__.py +15 -0
- heavenbase/backends/vector/chroma.py +359 -0
- heavenbase/backends/vector/lance.py +282 -0
- heavenbase/backends/vector/milvus.py +559 -0
- heavenbase/backends/vector/pgvector.py +82 -0
- heavenbase/backends/vector/pinecone.py +398 -0
- heavenbase/benchmark/__init__.py +46 -0
- heavenbase/benchmark/model.py +235 -0
- heavenbase/benchmark/report.py +237 -0
- heavenbase/benchmark/runner.py +473 -0
- heavenbase/benchmark/scenario.py +281 -0
- heavenbase/capsule/__init__.py +32 -0
- heavenbase/capsule/capture.py +311 -0
- heavenbase/capsule/docstring.py +57 -0
- heavenbase/capsule/errors.py +28 -0
- heavenbase/capsule/manifest.py +90 -0
- heavenbase/capsule/registry.py +448 -0
- heavenbase/capsule/runtime.py +515 -0
- heavenbase/capsule/schema.py +38 -0
- heavenbase/capsule/serialize.py +66 -0
- heavenbase/capsule/signature.py +128 -0
- heavenbase/cli/__init__.py +27 -0
- heavenbase/cli/app.py +93 -0
- heavenbase/cli/backends/__init__.py +9 -0
- heavenbase/cli/backends/argparse.py +96 -0
- heavenbase/cli/backends/click.py +87 -0
- heavenbase/cli/backends/typer.py +150 -0
- heavenbase/cli/groups/__init__.py +12 -0
- heavenbase/cli/groups/config.py +189 -0
- heavenbase/cli/groups/llm.py +780 -0
- heavenbase/cli/groups/mcp.py +295 -0
- heavenbase/cli/groups/prompt.py +165 -0
- heavenbase/cli/groups/root.py +34 -0
- heavenbase/cli/groups/ws.py +157 -0
- heavenbase/cli/output.py +59 -0
- heavenbase/cli/registry.py +118 -0
- heavenbase/cli/spec.py +143 -0
- heavenbase/discovery/__init__.py +258 -0
- heavenbase/entity/__init__.py +20 -0
- heavenbase/entity/base.py +210 -0
- heavenbase/entity/compiler.py +235 -0
- heavenbase/entity/fields.py +218 -0
- heavenbase/entity/ref.py +88 -0
- heavenbase/entity/system/__init__.py +11 -0
- heavenbase/entity/system/catalog.py +75 -0
- heavenbase/entity/system/config.py +28 -0
- heavenbase/entity/system/metaschema.py +60 -0
- heavenbase/ext.py +142 -0
- heavenbase/extensions/__init__.py +23 -0
- heavenbase/extensions/base.py +156 -0
- heavenbase/extensions/builtin.py +51 -0
- heavenbase/frame/__init__.py +222 -0
- heavenbase/handlers/__init__.py +50 -0
- heavenbase/handlers/filters.py +238 -0
- heavenbase/handlers/plugins.py +88 -0
- heavenbase/handlers/reasons.py +97 -0
- heavenbase/handlers/registry.py +58 -0
- heavenbase/handlers/search/__init__.py +7 -0
- heavenbase/handlers/search/elasticsearch.py +97 -0
- heavenbase/handlers/seed.py +135 -0
- heavenbase/handlers/sql/__init__.py +10 -0
- heavenbase/handlers/sql/base.py +223 -0
- heavenbase/handlers/vector/__init__.py +21 -0
- heavenbase/handlers/vector/base.py +28 -0
- heavenbase/handlers/vector/chroma.py +99 -0
- heavenbase/handlers/vector/lance.py +98 -0
- heavenbase/handlers/vector/milvus.py +114 -0
- heavenbase/handlers/vector/pgvector.py +53 -0
- heavenbase/handlers/vector/pinecone.py +109 -0
- heavenbase/interop/__init__.py +60 -0
- heavenbase/interop/base.py +243 -0
- heavenbase/interop/daft.py +70 -0
- heavenbase/interop/export.py +79 -0
- heavenbase/interop/mapping.py +304 -0
- heavenbase/interop/numpy.py +47 -0
- heavenbase/interop/pandas.py +52 -0
- heavenbase/interop/pyarrow.py +49 -0
- heavenbase/interop/pydantic.py +108 -0
- heavenbase/interop/sql.py +47 -0
- heavenbase/mcp/__init__.py +42 -0
- heavenbase/prompt/__init__.py +18 -0
- heavenbase/prompt/compose.py +117 -0
- heavenbase/prompt/name.py +85 -0
- heavenbase/prompt/prompt.py +582 -0
- heavenbase/prompt/translation.py +350 -0
- heavenbase/query/__init__.py +386 -0
- heavenbase/query/_planner.py +60 -0
- heavenbase/registry/__init__.py +11 -0
- heavenbase/registry/base.py +84 -0
- heavenbase/registry/workspace.py +270 -0
- heavenbase/resources/__init__.py +8 -0
- heavenbase/resources/configs/bootstrap.yaml +178 -0
- heavenbase/resources/configs/default.yaml +957 -0
- heavenbase/resources/prompts.py +85 -0
- heavenbase/resources/sql.py +250 -0
- heavenbase/schema/__init__.py +165 -0
- heavenbase/storage/__init__.py +138 -0
- heavenbase/storage/profiles.py +96 -0
- heavenbase/strategies/__init__.py +32 -0
- heavenbase/strategies/base.py +23 -0
- heavenbase/strategies/external_ref.py +13 -0
- heavenbase/strategies/graph_edge.py +13 -0
- heavenbase/strategies/inline_column.py +13 -0
- heavenbase/strategies/inverted_index.py +13 -0
- heavenbase/strategies/json_field.py +13 -0
- heavenbase/strategies/registry.py +48 -0
- heavenbase/strategies/side_table.py +13 -0
- heavenbase/strategies/storage.py +80 -0
- heavenbase/strategies/vector_index.py +13 -0
- heavenbase/toolkit/__init__.py +29 -0
- heavenbase/toolkit/anthropic.py +47 -0
- heavenbase/toolkit/errors.py +13 -0
- heavenbase/toolkit/mcp.py +209 -0
- heavenbase/toolkit/server.py +66 -0
- heavenbase/toolkit/toolkit.py +522 -0
- heavenbase/toolkit/workspace_tools.py +757 -0
- heavenbase/types/__init__.py +97 -0
- heavenbase/types/array.py +79 -0
- heavenbase/types/artifact.py +89 -0
- heavenbase/types/base.py +263 -0
- heavenbase/types/boolean.py +68 -0
- heavenbase/types/categorical.py +146 -0
- heavenbase/types/date.py +88 -0
- heavenbase/types/families.py +77 -0
- heavenbase/types/float.py +21 -0
- heavenbase/types/hyperg.py +189 -0
- heavenbase/types/identifier.py +34 -0
- heavenbase/types/integer.py +21 -0
- heavenbase/types/json.py +85 -0
- heavenbase/types/limits.py +63 -0
- heavenbase/types/long_text.py +52 -0
- heavenbase/types/medium_text.py +52 -0
- heavenbase/types/short_text.py +52 -0
- heavenbase/types/timestamp.py +113 -0
- heavenbase/types/vector.py +86 -0
- heavenbase/utils/__init__.py +563 -0
- heavenbase/utils/cmd.py +213 -0
- heavenbase/utils/coerce.py +65 -0
- heavenbase/utils/color.py +371 -0
- heavenbase/utils/config.py +1645 -0
- heavenbase/utils/config_api.py +178 -0
- heavenbase/utils/containers.py +23 -0
- heavenbase/utils/db/__init__.py +60 -0
- heavenbase/utils/db/database.py +1219 -0
- heavenbase/utils/db/ddl.py +163 -0
- heavenbase/utils/db/dialects.py +42 -0
- heavenbase/utils/db/display.py +118 -0
- heavenbase/utils/db/processor.py +392 -0
- heavenbase/utils/db/readonly.py +66 -0
- heavenbase/utils/db/registry.py +199 -0
- heavenbase/utils/db/response.py +490 -0
- heavenbase/utils/db/spec.py +301 -0
- heavenbase/utils/db/types.py +185 -0
- heavenbase/utils/db_path.py +62 -0
- heavenbase/utils/debug.py +224 -0
- heavenbase/utils/files.py +723 -0
- heavenbase/utils/hash.py +143 -0
- heavenbase/utils/ids.py +264 -0
- heavenbase/utils/llm/__init__.py +32 -0
- heavenbase/utils/llm/adapters.py +923 -0
- heavenbase/utils/llm/base.py +1629 -0
- heavenbase/utils/llm/cache.py +493 -0
- heavenbase/utils/llm/image.py +257 -0
- heavenbase/utils/llm/mock.py +211 -0
- heavenbase/utils/llm/repair.py +197 -0
- heavenbase/utils/llm/response.py +165 -0
- heavenbase/utils/llm/session.py +195 -0
- heavenbase/utils/llm/spec.py +401 -0
- heavenbase/utils/log.py +604 -0
- heavenbase/utils/naming.py +77 -0
- heavenbase/utils/network.py +80 -0
- heavenbase/utils/ops.py +160 -0
- heavenbase/utils/parallel.py +477 -0
- heavenbase/utils/registry_identity.py +175 -0
- heavenbase/utils/rng.py +581 -0
- heavenbase/utils/serialize.py +791 -0
- heavenbase/utils/spec.py +190 -0
- heavenbase/utils/strings.py +622 -0
- heavenbase/utils/typing.py +63 -0
- heavenbase/version.py +7 -0
- heavenbase/workspace/__init__.py +2109 -0
- heavenbase/workspace/_crud.py +232 -0
- heavenbase/workspace/_query.py +53 -0
- heavenbase/workspace/_registry.py +51 -0
- heavenbase/workspace/_system.py +155 -0
- heavenbase/workspace/_writer.py +55 -0
- heavenbase/workspace/catalog.py +89 -0
- heavenbase/workspace/presets.py +103 -0
- heavenbase-0.1.0.1.dev0.dist-info/METADATA +377 -0
- heavenbase-0.1.0.1.dev0.dist-info/RECORD +220 -0
- heavenbase-0.1.0.1.dev0.dist-info/WHEEL +5 -0
- heavenbase-0.1.0.1.dev0.dist-info/entry_points.txt +2 -0
- heavenbase-0.1.0.1.dev0.dist-info/licenses/LICENSE +21 -0
- heavenbase-0.1.0.1.dev0.dist-info/top_level.txt +1 -0
heavenbase/__init__.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""HeavenBase public API.
|
|
2
|
+
|
|
3
|
+
The root package exposes user-facing workspace, entity, query, prompt, LLM,
|
|
4
|
+
Capsule, Toolkit, interop, grouped backend, and utility entry points. Backend
|
|
5
|
+
registry, handler, storage, and low-level extension contracts live under
|
|
6
|
+
``heavenbase.ext``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"__version__",
|
|
11
|
+
"Array",
|
|
12
|
+
"Artifact",
|
|
13
|
+
"Catalog",
|
|
14
|
+
"capabilities",
|
|
15
|
+
"backends",
|
|
16
|
+
"BackendType",
|
|
17
|
+
"Boolean",
|
|
18
|
+
"Categorical",
|
|
19
|
+
"Date",
|
|
20
|
+
"Entity",
|
|
21
|
+
"ExternalRef",
|
|
22
|
+
"Float",
|
|
23
|
+
"GraphEdge",
|
|
24
|
+
"HeavenBase",
|
|
25
|
+
"HyperG",
|
|
26
|
+
"Identifier",
|
|
27
|
+
"InlineColumn",
|
|
28
|
+
"Integer",
|
|
29
|
+
"InvertedIndex",
|
|
30
|
+
"Json",
|
|
31
|
+
"JsonField",
|
|
32
|
+
"LogicalType",
|
|
33
|
+
"LongText",
|
|
34
|
+
"MediumText",
|
|
35
|
+
"MetaSchema",
|
|
36
|
+
"Prompt",
|
|
37
|
+
"QueryBuilder",
|
|
38
|
+
"QuerySpec",
|
|
39
|
+
"ResultFrame",
|
|
40
|
+
"ShortText",
|
|
41
|
+
"SideTable",
|
|
42
|
+
"StorageStrategy",
|
|
43
|
+
"Timestamp",
|
|
44
|
+
"Vector",
|
|
45
|
+
"VectorIndex",
|
|
46
|
+
"field",
|
|
47
|
+
"fast_prompt_section",
|
|
48
|
+
"CM_HVNB",
|
|
49
|
+
"ConfigEngine",
|
|
50
|
+
"ConfigSpec",
|
|
51
|
+
"resolve_interpolations",
|
|
52
|
+
"Database",
|
|
53
|
+
"DBEngine",
|
|
54
|
+
"DBSpec",
|
|
55
|
+
"DB_ENGINE",
|
|
56
|
+
"DatabaseEngineRegistry",
|
|
57
|
+
"SQLResponse",
|
|
58
|
+
"StableRNG",
|
|
59
|
+
"dumps_json",
|
|
60
|
+
"loads_json",
|
|
61
|
+
"md_section",
|
|
62
|
+
"LLM",
|
|
63
|
+
"LLMCache",
|
|
64
|
+
"LLMEmbeddingEntry",
|
|
65
|
+
"LLMEngine",
|
|
66
|
+
"LLMImage",
|
|
67
|
+
"LLMImageEntry",
|
|
68
|
+
"LLMResponse",
|
|
69
|
+
"LLMSpec",
|
|
70
|
+
"LLMTextEntry",
|
|
71
|
+
"LLMToolCallRepair",
|
|
72
|
+
"Capsule",
|
|
73
|
+
"capsule_registry",
|
|
74
|
+
"Tool",
|
|
75
|
+
"Toolkit",
|
|
76
|
+
"InteropIssue",
|
|
77
|
+
"InteropReport",
|
|
78
|
+
"InteropSet",
|
|
79
|
+
"from_daft",
|
|
80
|
+
"from_numpy",
|
|
81
|
+
"from_pandas",
|
|
82
|
+
"from_pyarrow",
|
|
83
|
+
"from_pydantic",
|
|
84
|
+
"from_sql_result",
|
|
85
|
+
"register_interop_type_inferer",
|
|
86
|
+
"ext",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
from heavenbase.utils.config import CM_HVNB
|
|
90
|
+
|
|
91
|
+
from .version import __version__
|
|
92
|
+
from ._bootstrap import bootstrap as _bootstrap
|
|
93
|
+
|
|
94
|
+
_bootstrap()
|
|
95
|
+
|
|
96
|
+
from . import backends as backends
|
|
97
|
+
from .backends import BackendType
|
|
98
|
+
from .discovery import capabilities
|
|
99
|
+
from .frame import ResultFrame
|
|
100
|
+
from .entity import Catalog, Entity, MetaSchema, field
|
|
101
|
+
from .interop import (
|
|
102
|
+
InteropIssue,
|
|
103
|
+
InteropReport,
|
|
104
|
+
InteropSet,
|
|
105
|
+
from_daft,
|
|
106
|
+
from_numpy,
|
|
107
|
+
from_pandas,
|
|
108
|
+
from_pydantic,
|
|
109
|
+
from_pyarrow,
|
|
110
|
+
from_sql_result,
|
|
111
|
+
register_interop_type_inferer,
|
|
112
|
+
)
|
|
113
|
+
from .utils.llm import LLM, LLMCache, LLMEmbeddingEntry, LLMEngine, LLMImage, LLMImageEntry, LLMResponse, LLMSpec, LLMTextEntry, LLMToolCallRepair
|
|
114
|
+
from .utils.db import Database, DatabaseEngineRegistry, DBEngine, DBSpec, DB_ENGINE, SQLResponse
|
|
115
|
+
from .utils.rng import StableRNG
|
|
116
|
+
from .utils.serialize import dumps_json, loads_json
|
|
117
|
+
from .utils.spec import ConfigEngine, ConfigSpec, resolve_interpolations
|
|
118
|
+
from .utils.strings import md_section
|
|
119
|
+
from .query import QueryBuilder, QuerySpec
|
|
120
|
+
from .strategies import (
|
|
121
|
+
ExternalRef,
|
|
122
|
+
GraphEdge,
|
|
123
|
+
InlineColumn,
|
|
124
|
+
InvertedIndex,
|
|
125
|
+
JsonField,
|
|
126
|
+
SideTable,
|
|
127
|
+
StorageStrategy,
|
|
128
|
+
VectorIndex,
|
|
129
|
+
)
|
|
130
|
+
from .types import (
|
|
131
|
+
Array,
|
|
132
|
+
Artifact,
|
|
133
|
+
Boolean,
|
|
134
|
+
Categorical,
|
|
135
|
+
Date,
|
|
136
|
+
Float,
|
|
137
|
+
HyperG,
|
|
138
|
+
Identifier,
|
|
139
|
+
Integer,
|
|
140
|
+
Json,
|
|
141
|
+
LogicalType,
|
|
142
|
+
LongText,
|
|
143
|
+
MediumText,
|
|
144
|
+
ShortText,
|
|
145
|
+
Timestamp,
|
|
146
|
+
Vector,
|
|
147
|
+
)
|
|
148
|
+
from .workspace import HeavenBase
|
|
149
|
+
from .capsule import Capsule, capsule_registry
|
|
150
|
+
from .toolkit import Tool, Toolkit
|
|
151
|
+
from .prompt import Prompt, fast_prompt_section
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
from . import ext as ext
|
heavenbase/_bootstrap.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Explicit bootstrap — centralised registration entry point.
|
|
2
|
+
|
|
3
|
+
Called once during ``import heavenbase``. Keeps module-level side effects
|
|
4
|
+
out of individual ``__init__.py`` files. Importing any ``heavenbase.*``
|
|
5
|
+
submodule loads the root package first and therefore runs bootstrap.
|
|
6
|
+
Tests can reset state by calling ``_bootstrap._reset()`` followed by
|
|
7
|
+
``_bootstrap.bootstrap()``.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
_BOOTSTRAPPED = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def bootstrap() -> None:
|
|
16
|
+
"""Register all built-in logical types, strategies, and handlers."""
|
|
17
|
+
global _BOOTSTRAPPED
|
|
18
|
+
if _BOOTSTRAPPED:
|
|
19
|
+
return
|
|
20
|
+
_register_types()
|
|
21
|
+
_register_strategies()
|
|
22
|
+
_BOOTSTRAPPED = True
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _reset() -> None:
|
|
26
|
+
"""Reset bootstrap state (test helper)."""
|
|
27
|
+
global _BOOTSTRAPPED
|
|
28
|
+
_BOOTSTRAPPED = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _register_types() -> None:
|
|
32
|
+
from .types import (
|
|
33
|
+
TYPE_FAMILY_FILTER,
|
|
34
|
+
TYPE_FAMILY_FILTER_ARRAY,
|
|
35
|
+
TYPE_FAMILY_FILTER_BOOL,
|
|
36
|
+
TYPE_FAMILY_FILTER_ORDER,
|
|
37
|
+
TYPE_FAMILY_FILTER_SIDE,
|
|
38
|
+
TYPE_FAMILY_FILTER_STRUCT,
|
|
39
|
+
TYPE_FAMILY_FILTER_TEXT,
|
|
40
|
+
TYPE_FAMILY_FILTER_VECTOR,
|
|
41
|
+
TYPE_FAMILY_STORAGE_JSON,
|
|
42
|
+
TYPE_FAMILY_STORAGE_LONG_TEXT,
|
|
43
|
+
TYPE_FAMILY_STORAGE_SIDE,
|
|
44
|
+
TYPE_FAMILY_STORAGE_VECTOR,
|
|
45
|
+
register_logical_type,
|
|
46
|
+
)
|
|
47
|
+
from .types.artifact import Artifact
|
|
48
|
+
from .types.array import Array
|
|
49
|
+
from .types.boolean import Boolean
|
|
50
|
+
from .types.categorical import Categorical
|
|
51
|
+
from .types.date import Date
|
|
52
|
+
from .types.float import Float
|
|
53
|
+
from .types.hyperg import HyperG
|
|
54
|
+
from .types.identifier import Identifier
|
|
55
|
+
from .types.integer import Integer
|
|
56
|
+
from .types.json import Json
|
|
57
|
+
from .types.long_text import LongText
|
|
58
|
+
from .types.medium_text import MediumText
|
|
59
|
+
from .types.short_text import ShortText
|
|
60
|
+
from .types.timestamp import Timestamp
|
|
61
|
+
from .types.vector import Vector
|
|
62
|
+
|
|
63
|
+
register_logical_type(Artifact)
|
|
64
|
+
register_logical_type(Array, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_ARRAY, TYPE_FAMILY_FILTER_SIDE, TYPE_FAMILY_STORAGE_SIDE))
|
|
65
|
+
register_logical_type(Boolean, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_BOOL))
|
|
66
|
+
register_logical_type(Categorical, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_TEXT, TYPE_FAMILY_FILTER_ORDER))
|
|
67
|
+
register_logical_type(Date, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_ORDER, TYPE_FAMILY_FILTER_TEXT))
|
|
68
|
+
register_logical_type(Float, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_ORDER))
|
|
69
|
+
register_logical_type(HyperG, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_STRUCT, TYPE_FAMILY_FILTER_SIDE, TYPE_FAMILY_STORAGE_SIDE))
|
|
70
|
+
register_logical_type(Identifier, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_TEXT))
|
|
71
|
+
register_logical_type(Integer, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_ORDER))
|
|
72
|
+
register_logical_type(Json, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_STRUCT, TYPE_FAMILY_STORAGE_JSON))
|
|
73
|
+
register_logical_type(LongText, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_TEXT, TYPE_FAMILY_STORAGE_LONG_TEXT))
|
|
74
|
+
register_logical_type(MediumText, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_TEXT))
|
|
75
|
+
register_logical_type(ShortText, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_TEXT))
|
|
76
|
+
register_logical_type(Timestamp, families=(TYPE_FAMILY_FILTER, TYPE_FAMILY_FILTER_ORDER))
|
|
77
|
+
register_logical_type(Vector, families=(TYPE_FAMILY_FILTER_VECTOR, TYPE_FAMILY_STORAGE_VECTOR))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _register_strategies() -> None:
|
|
81
|
+
from .strategies import STRATEGY_FAMILY_FILTER, STRATEGY_FAMILY_SIDE_FILTER, STRATEGY_FAMILY_VECTOR, register_strategy
|
|
82
|
+
from .strategies.base import ExternalRef, GraphEdge, InlineColumn, InvertedIndex, JsonField, SideTable, VectorIndex
|
|
83
|
+
|
|
84
|
+
register_strategy(ExternalRef)
|
|
85
|
+
register_strategy(GraphEdge)
|
|
86
|
+
register_strategy(InlineColumn, families=(STRATEGY_FAMILY_FILTER,))
|
|
87
|
+
register_strategy(InvertedIndex, families=(STRATEGY_FAMILY_FILTER,))
|
|
88
|
+
register_strategy(JsonField, families=(STRATEGY_FAMILY_FILTER,))
|
|
89
|
+
register_strategy(SideTable, families=(STRATEGY_FAMILY_SIDE_FILTER,))
|
|
90
|
+
register_strategy(VectorIndex, families=(STRATEGY_FAMILY_VECTOR,))
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Built-in backend implementations."""
|
|
2
|
+
|
|
3
|
+
__all__ = [
|
|
4
|
+
"Backend",
|
|
5
|
+
"BackendRegistry",
|
|
6
|
+
"BackendType",
|
|
7
|
+
"backend_families",
|
|
8
|
+
"backend_identifier_in_family",
|
|
9
|
+
"backend_identifiers_for",
|
|
10
|
+
"BACKEND_FAMILY_ALL",
|
|
11
|
+
"BACKEND_FAMILY_HANDLER",
|
|
12
|
+
"BACKEND_FAMILY_STORAGE_ROW",
|
|
13
|
+
"BACKEND_FAMILY_STORAGE_TEXT",
|
|
14
|
+
"BACKEND_FAMILY_STORAGE_VECTOR",
|
|
15
|
+
"BACKEND_FAMILY_SEARCH",
|
|
16
|
+
"BACKEND_FAMILY_SQL",
|
|
17
|
+
"build_backend",
|
|
18
|
+
"ChromaBackend",
|
|
19
|
+
"QueryFragment",
|
|
20
|
+
"DuckDBBackend",
|
|
21
|
+
"ElasticsearchBackend",
|
|
22
|
+
"FileBackend",
|
|
23
|
+
"InMemBackend",
|
|
24
|
+
"JsonFileBackend",
|
|
25
|
+
"LanceBackend",
|
|
26
|
+
"MilvusBackend",
|
|
27
|
+
"MsSQLBackend",
|
|
28
|
+
"MySQLBackend",
|
|
29
|
+
"OceanBaseBackend",
|
|
30
|
+
"OracleBackend",
|
|
31
|
+
"PgVectorBackend",
|
|
32
|
+
"PickleFileBackend",
|
|
33
|
+
"PineconeBackend",
|
|
34
|
+
"HandlerContext",
|
|
35
|
+
"PostgresBackend",
|
|
36
|
+
"RowOp",
|
|
37
|
+
"SQLBackend",
|
|
38
|
+
"ScanningBackend",
|
|
39
|
+
"SQLiteBackend",
|
|
40
|
+
"StarRocksBackend",
|
|
41
|
+
"TrinoBackend",
|
|
42
|
+
"get_backend_builder",
|
|
43
|
+
"load_configured_backends",
|
|
44
|
+
"register_backend_builder",
|
|
45
|
+
"register_backend_class",
|
|
46
|
+
"registered_backend_identifiers",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
from .base import Backend, QueryFragment, HandlerContext, RowOp, ScanningBackend
|
|
50
|
+
from .elasticsearch import ElasticsearchBackend
|
|
51
|
+
from .file import FileBackend, JsonFileBackend, PickleFileBackend
|
|
52
|
+
from .inmem import InMemBackend
|
|
53
|
+
from .families import (
|
|
54
|
+
backend_families,
|
|
55
|
+
backend_identifier_in_family,
|
|
56
|
+
backend_identifiers_for,
|
|
57
|
+
BACKEND_FAMILY_ALL,
|
|
58
|
+
BACKEND_FAMILY_HANDLER,
|
|
59
|
+
BACKEND_FAMILY_STORAGE_ROW,
|
|
60
|
+
BACKEND_FAMILY_STORAGE_TEXT,
|
|
61
|
+
BACKEND_FAMILY_STORAGE_VECTOR,
|
|
62
|
+
BACKEND_FAMILY_SEARCH,
|
|
63
|
+
BACKEND_FAMILY_SQL,
|
|
64
|
+
register_backend_class,
|
|
65
|
+
)
|
|
66
|
+
from .type_registry import BackendType
|
|
67
|
+
from .registry import BackendRegistry, build_backend, get_backend_builder, load_configured_backends, register_backend_builder, registered_backend_identifiers
|
|
68
|
+
from .sql import (
|
|
69
|
+
DuckDBBackend,
|
|
70
|
+
MsSQLBackend,
|
|
71
|
+
MySQLBackend,
|
|
72
|
+
OceanBaseBackend,
|
|
73
|
+
OracleBackend,
|
|
74
|
+
PostgresBackend,
|
|
75
|
+
SQLBackend,
|
|
76
|
+
SQLiteBackend,
|
|
77
|
+
StarRocksBackend,
|
|
78
|
+
TrinoBackend,
|
|
79
|
+
)
|
|
80
|
+
from .vector import ChromaBackend, LanceBackend, MilvusBackend, PgVectorBackend, PineconeBackend
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Base backend protocol and query fragment primitives."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"Backend",
|
|
7
|
+
"QueryFragment",
|
|
8
|
+
"LOGICAL_OPS",
|
|
9
|
+
"HandlerContext",
|
|
10
|
+
"RowOp",
|
|
11
|
+
"ScanningBackend",
|
|
12
|
+
"safe_error",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
import threading
|
|
16
|
+
from functools import wraps
|
|
17
|
+
|
|
18
|
+
from heavenbase.utils import Any, Callable, Dict, Iterable, List, Optional, Sequence
|
|
19
|
+
|
|
20
|
+
from heavenbase.utils import dataclass, field, safe_error
|
|
21
|
+
from heavenbase.utils.config import CM_HVNB
|
|
22
|
+
|
|
23
|
+
from ..frame import ResultFrame
|
|
24
|
+
from ..schema import EntitySchema
|
|
25
|
+
from ..utils.ids import check_id, check_workspace_id
|
|
26
|
+
|
|
27
|
+
LOGICAL_OPS = ("and", "or", "not")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def backend_locked(fn: Callable[..., Any]) -> Callable[..., Any]:
|
|
31
|
+
"""Run a backend method under the instance re-entrant lock."""
|
|
32
|
+
|
|
33
|
+
@wraps(fn)
|
|
34
|
+
def _wrapped(self: Any, *args: Any, **kwargs: Any) -> Any:
|
|
35
|
+
with self._lock:
|
|
36
|
+
return fn(self, *args, **kwargs)
|
|
37
|
+
|
|
38
|
+
return _wrapped
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class RowOp:
|
|
43
|
+
"""One object_id row mutation routed to a backend."""
|
|
44
|
+
|
|
45
|
+
entity_id: str
|
|
46
|
+
object_id: Any
|
|
47
|
+
values: Dict[str, Any] = field(default_factory=dict)
|
|
48
|
+
op: str = "upsert"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class QueryFragment:
|
|
53
|
+
"""Backend-ready operation fragment produced by a handler."""
|
|
54
|
+
|
|
55
|
+
backend: str
|
|
56
|
+
op: str
|
|
57
|
+
field: str
|
|
58
|
+
payload: Any
|
|
59
|
+
returns: List[str] = field(default_factory=list)
|
|
60
|
+
notes: Dict[str, Any] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class HandlerContext:
|
|
65
|
+
"""Handler compile context keyed by backend instance, backend type, and strategy."""
|
|
66
|
+
|
|
67
|
+
backend: str
|
|
68
|
+
type: str
|
|
69
|
+
strategy: str
|
|
70
|
+
top_k: int = field(default_factory=lambda: int(CM_HVNB.get("heavenbase.query.near.default_top_k", default=50)))
|
|
71
|
+
metric: str = field(default_factory=lambda: str(CM_HVNB.get("heavenbase.query.near.default_metric", default="cosine")))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class Backend:
|
|
75
|
+
"""Thin physical adapter. It executes fragments; it does not parse QuerySpec."""
|
|
76
|
+
|
|
77
|
+
identifier = "backend"
|
|
78
|
+
binary_storage = "bytes"
|
|
79
|
+
near_filter_modes_supported: Sequence[str] = ()
|
|
80
|
+
|
|
81
|
+
def __init__(self, name: str, ws_id: str | None = None):
|
|
82
|
+
self.name = check_id(name, "backend instance name")
|
|
83
|
+
self.type = type(self).identifier
|
|
84
|
+
self.ws_id = check_workspace_id(ws_id) if ws_id else None
|
|
85
|
+
self._lock = threading.RLock()
|
|
86
|
+
|
|
87
|
+
def bind_workspace(self, ws_id: str) -> "Backend":
|
|
88
|
+
"""Bind this backend instance to a validated workspace namespace."""
|
|
89
|
+
with self._lock:
|
|
90
|
+
next_id = check_workspace_id(ws_id)
|
|
91
|
+
if self.ws_id != next_id:
|
|
92
|
+
self.ws_id = next_id
|
|
93
|
+
self._refresh_workspace()
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
def _refresh_workspace(self) -> None:
|
|
97
|
+
"""Refresh state derived from the workspace namespace."""
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
def drop(self) -> None:
|
|
101
|
+
"""Drop this backend's current workspace namespace, if it has one."""
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
def ensure(self, schema: EntitySchema, bindings: Sequence[Any]) -> None:
|
|
105
|
+
"""Prepare storage for an entity schema and its bindings."""
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def upsert(self, rows: Sequence[RowOp]) -> None:
|
|
109
|
+
"""Insert, replace, or delete backend rows."""
|
|
110
|
+
raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
def delete(self, entity_id: str, object_ids: Iterable[Any]) -> None:
|
|
113
|
+
"""Delete object_ids for one entity."""
|
|
114
|
+
self.upsert([RowOp(entity_id, object_id, op="delete") for object_id in object_ids])
|
|
115
|
+
|
|
116
|
+
def get_many(self, entity_id: str, object_ids: Iterable[Any]) -> List[Optional[Dict[str, Any]]]:
|
|
117
|
+
"""Return rows for object_ids in input order, using scan as the portable fallback."""
|
|
118
|
+
ordered_ids = list(object_ids)
|
|
119
|
+
wanted = set(ordered_ids)
|
|
120
|
+
rows = {row.get("object_id"): row for row in self.scan(entity_id) if row.get("object_id") in wanted}
|
|
121
|
+
return [rows.get(row_id) for row_id in ordered_ids]
|
|
122
|
+
|
|
123
|
+
def exists_many(self, entity_id: str, object_ids: Iterable[Any]) -> List[bool]:
|
|
124
|
+
"""Return existence flags for object_ids in input order."""
|
|
125
|
+
return [row is not None for row in self.get_many(entity_id, object_ids)]
|
|
126
|
+
|
|
127
|
+
def count(self, entity_id: str) -> int:
|
|
128
|
+
"""Return row count for one entity."""
|
|
129
|
+
return len(self.scan(entity_id))
|
|
130
|
+
|
|
131
|
+
def execute(self, fragment: QueryFragment) -> ResultFrame:
|
|
132
|
+
"""Execute a compiled query fragment and return a ResultFrame."""
|
|
133
|
+
raise NotImplementedError
|
|
134
|
+
|
|
135
|
+
def scan(self, entity_id: str) -> List[Dict[str, Any]]:
|
|
136
|
+
"""Return all rows for one entity as dictionaries."""
|
|
137
|
+
raise NotImplementedError
|
|
138
|
+
|
|
139
|
+
def to_dict(self) -> dict:
|
|
140
|
+
"""Return backend metadata suitable for system catalog rows."""
|
|
141
|
+
return {"name": self.name, "type": self.type}
|
|
142
|
+
|
|
143
|
+
def near_filter_modes(self) -> List[str]:
|
|
144
|
+
"""Return near/filter planning modes this backend type can participate in."""
|
|
145
|
+
return list(self.near_filter_modes_supported)
|
|
146
|
+
|
|
147
|
+
def candidate_id_fragment(self, object_ids: Sequence[Any]) -> QueryFragment:
|
|
148
|
+
"""Return a backend-local object_id filter fragment for candidate prefiltering."""
|
|
149
|
+
id_set = set(object_ids)
|
|
150
|
+
|
|
151
|
+
def _payload(row: dict) -> bool:
|
|
152
|
+
return row.get("object_id") in id_set
|
|
153
|
+
|
|
154
|
+
return QueryFragment(self.name, "in", "object_id", _payload, notes={"mode": "py_scan", "row_payload": _payload, "candidate_ids": list(object_ids)})
|
|
155
|
+
|
|
156
|
+
def capabilities(self) -> Dict[str, Any]:
|
|
157
|
+
"""Return backend capabilities exposed through workspace metadata."""
|
|
158
|
+
modes = self.near_filter_modes()
|
|
159
|
+
return {
|
|
160
|
+
"near_filter_modes": modes,
|
|
161
|
+
"candidate_id_prefilter": "candidate_id_prefilter" in modes,
|
|
162
|
+
"row_lookup": self.row_lookup_capabilities(),
|
|
163
|
+
"binary_storage": self.binary_storage,
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
def row_lookup_capabilities(self) -> Dict[str, str]:
|
|
167
|
+
"""Return object_id lookup implementation modes for this backend."""
|
|
168
|
+
return {"get_many": "scan", "exists_many": "scan", "count": "scan"}
|
|
169
|
+
|
|
170
|
+
def to_mcp(
|
|
171
|
+
self,
|
|
172
|
+
*,
|
|
173
|
+
name: Optional[str] = None,
|
|
174
|
+
workspace_name: Optional[str] = None,
|
|
175
|
+
description: Optional[str] = None,
|
|
176
|
+
profile: Optional[str] = None,
|
|
177
|
+
tools: Optional[Iterable[str]] = None,
|
|
178
|
+
) -> Any:
|
|
179
|
+
"""Return a native Toolkit bound to this backend instance."""
|
|
180
|
+
from ..mcp import _backend_toolkit
|
|
181
|
+
|
|
182
|
+
return _backend_toolkit(self, name=name, workspace_name=workspace_name, description=description, profile=profile, tools=tools)
|
|
183
|
+
|
|
184
|
+
def to_mcp_json(
|
|
185
|
+
self,
|
|
186
|
+
*,
|
|
187
|
+
name: Optional[str] = None,
|
|
188
|
+
workspace_name: Optional[str] = None,
|
|
189
|
+
description: Optional[str] = None,
|
|
190
|
+
profile: Optional[str] = None,
|
|
191
|
+
tools: Optional[Iterable[str]] = None,
|
|
192
|
+
transport: Optional[str] = None,
|
|
193
|
+
host: Optional[str] = None,
|
|
194
|
+
port: Optional[int] = None,
|
|
195
|
+
) -> str:
|
|
196
|
+
"""Return MCP client config JSON for this backend instance."""
|
|
197
|
+
from ..mcp import _mcp_transport
|
|
198
|
+
|
|
199
|
+
return self.to_mcp(name=name, workspace_name=workspace_name, description=description, profile=profile, tools=tools).to_mcp_json(
|
|
200
|
+
transport=_mcp_transport(transport),
|
|
201
|
+
host=host,
|
|
202
|
+
port=port,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def serve(
|
|
206
|
+
self,
|
|
207
|
+
*,
|
|
208
|
+
name: Optional[str] = None,
|
|
209
|
+
workspace_name: Optional[str] = None,
|
|
210
|
+
description: Optional[str] = None,
|
|
211
|
+
profile: Optional[str] = None,
|
|
212
|
+
tools: Optional[Iterable[str]] = None,
|
|
213
|
+
transport: Optional[str] = None,
|
|
214
|
+
host: Optional[str] = None,
|
|
215
|
+
port: Optional[int] = None,
|
|
216
|
+
wait: Optional[bool] = None,
|
|
217
|
+
**server_kwargs: Any,
|
|
218
|
+
) -> Any:
|
|
219
|
+
"""Serve this backend instance as an MCP server."""
|
|
220
|
+
from ..mcp import _mcp_host, _mcp_port, _mcp_transport, _mcp_wait
|
|
221
|
+
|
|
222
|
+
return self.to_mcp(name=name, workspace_name=workspace_name, description=description, profile=profile, tools=tools).serve(
|
|
223
|
+
transport=_mcp_transport(transport),
|
|
224
|
+
host=_mcp_host(host),
|
|
225
|
+
port=_mcp_port(port),
|
|
226
|
+
wait=_mcp_wait(wait),
|
|
227
|
+
**server_kwargs,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class ScanningBackend(Backend):
|
|
232
|
+
"""Backend base for providers that can scan rows and execute Python fragments."""
|
|
233
|
+
|
|
234
|
+
def _row_payload(self, fragment: QueryFragment) -> Callable[[Dict[str, Any]], Any]:
|
|
235
|
+
payload = fragment.notes.get("row_payload", fragment.payload)
|
|
236
|
+
if not callable(payload):
|
|
237
|
+
raise TypeError(f"{type(self).__name__} expects a callable row payload, got {type(payload).__name__}")
|
|
238
|
+
return payload
|
|
239
|
+
|
|
240
|
+
def _row_ok(self, fragment: QueryFragment, row: Dict[str, Any]) -> bool:
|
|
241
|
+
if fragment.op == "and":
|
|
242
|
+
return all(self._row_ok(child, row) for child in fragment.payload)
|
|
243
|
+
if fragment.op == "or":
|
|
244
|
+
return any(self._row_ok(child, row) for child in fragment.payload)
|
|
245
|
+
if fragment.op == "not":
|
|
246
|
+
return not self._row_ok(fragment.payload[0], row)
|
|
247
|
+
return bool(self._row_payload(fragment)(row))
|
|
248
|
+
|
|
249
|
+
def _filter_fragment(self, fragment: QueryFragment) -> Any:
|
|
250
|
+
return (
|
|
251
|
+
fragment.payload.get("filter")
|
|
252
|
+
if isinstance(fragment.payload, dict) and fragment.payload.get("filter") is not None
|
|
253
|
+
else fragment.notes.get("filter")
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def _top_k(self, fragment: QueryFragment) -> int:
|
|
257
|
+
default_top_k = int(CM_HVNB.get("heavenbase.query.near.default_top_k", default=50))
|
|
258
|
+
return fragment.notes.get("top_k", fragment.payload.get("top_k", default_top_k) if isinstance(fragment.payload, dict) else default_top_k)
|
|
259
|
+
|
|
260
|
+
def execute(self, fragment: QueryFragment) -> ResultFrame:
|
|
261
|
+
"""Execute a fragment by scanning rows with Python predicates."""
|
|
262
|
+
entity_id = fragment.notes.get("entity_id")
|
|
263
|
+
rows = self.scan(entity_id) if entity_id else []
|
|
264
|
+
if fragment.op in LOGICAL_OPS:
|
|
265
|
+
return ResultFrame.from_rows([row for row in rows if self._row_ok(fragment, row)])
|
|
266
|
+
payload = self._row_payload(fragment)
|
|
267
|
+
if fragment.op == "near":
|
|
268
|
+
filt = self._filter_fragment(fragment)
|
|
269
|
+
scored = []
|
|
270
|
+
for row in rows:
|
|
271
|
+
if filt is not None and not self._row_ok(filt, row):
|
|
272
|
+
continue
|
|
273
|
+
score = payload(row)
|
|
274
|
+
if score is not None:
|
|
275
|
+
out = dict(row)
|
|
276
|
+
out["score"] = score
|
|
277
|
+
scored.append(out)
|
|
278
|
+
scored.sort(key=lambda row: row.get("score", 0.0), reverse=True)
|
|
279
|
+
return ResultFrame.from_rows(scored[: self._top_k(fragment)])
|
|
280
|
+
return ResultFrame.from_rows([row for row in rows if payload(row)])
|