lance-context 0.2.4__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lance_context-0.2.4 → lance_context-0.3.1}/Cargo.toml +1 -1
- {lance_context-0.2.4 → lance_context-0.3.1}/PKG-INFO +7 -6
- lance_context-0.3.1/crates/lance-context-api/Cargo.toml +16 -0
- lance_context-0.3.1/crates/lance-context-api/src/lib.rs +330 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/crates/lance-context-core/Cargo.toml +12 -8
- lance_context-0.3.1/crates/lance-context-core/src/api_impl.rs +180 -0
- lance_context-0.3.1/crates/lance-context-core/src/lib.rs +20 -0
- lance_context-0.3.1/crates/lance-context-core/src/record.rs +277 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/crates/lance-context-core/src/serde.rs +1 -0
- lance_context-0.3.1/crates/lance-context-core/src/store.rs +2333 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/pyproject.toml +8 -6
- {lance_context-0.2.4 → lance_context-0.3.1}/python/Cargo.toml +3 -2
- lance_context-0.3.1/python/lance_context/__init__.py +9 -0
- lance_context-0.3.1/python/lance_context/api.py +801 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/python/tests/test_context.py +13 -0
- lance_context-0.3.1/python/src/lib.rs +857 -0
- lance_context-0.3.1/python/tests/test_add_many.py +67 -0
- lance_context-0.3.1/python/tests/test_async.py +177 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/tests/test_compaction.py +8 -5
- lance_context-0.3.1/python/tests/test_delete.py +97 -0
- lance_context-0.3.1/python/tests/test_external_id.py +50 -0
- lance_context-0.3.1/python/tests/test_gcs_persistence.py +90 -0
- lance_context-0.3.1/python/tests/test_id_index.py +84 -0
- lance_context-0.3.1/python/tests/test_persistence.py +438 -0
- lance_context-0.3.1/python/tests/test_search.py +860 -0
- lance_context-0.3.1/python/tests/test_storage_options.py +176 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/uv.lock +669 -33
- lance_context-0.2.4/crates/lance-context/Cargo.toml +0 -12
- lance_context-0.2.4/crates/lance-context/README.md +0 -3
- lance_context-0.2.4/crates/lance-context/src/lib.rs +0 -1
- lance_context-0.2.4/crates/lance-context-core/src/lib.rs +0 -13
- lance_context-0.2.4/crates/lance-context-core/src/record.rs +0 -33
- lance_context-0.2.4/crates/lance-context-core/src/store.rs +0 -953
- lance_context-0.2.4/python/lance_context/__init__.py +0 -5
- lance_context-0.2.4/python/lance_context/api.py +0 -343
- lance_context-0.2.4/python/src/lib.rs +0 -406
- lance_context-0.2.4/python/tests/test_persistence.py +0 -220
- lance_context-0.2.4/python/tests/test_search.py +0 -292
- {lance_context-0.2.4 → lance_context-0.3.1}/LICENSE +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/README.md +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/crates/lance-context-core/README.md +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/crates/lance-context-core/src/context.rs +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/Cargo.lock +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/LICENSE +0 -0
- {lance_context-0.2.4 → lance_context-0.3.1}/python/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lance-context
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: Intended Audience :: Science/Research
|
|
6
6
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
@@ -14,15 +14,16 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Rust
|
|
16
16
|
Classifier: Topic :: Scientific/Engineering
|
|
17
|
-
Requires-Dist: pylance==
|
|
18
|
-
Requires-Dist: lancedb==0.
|
|
19
|
-
Requires-Dist: lance-namespace==0.
|
|
20
|
-
Requires-Dist: lance-graph==0.4
|
|
17
|
+
Requires-Dist: pylance==7.0.0
|
|
18
|
+
Requires-Dist: lancedb==0.27.1
|
|
19
|
+
Requires-Dist: lance-namespace==0.7.7
|
|
20
|
+
Requires-Dist: lance-graph==0.5.4
|
|
21
21
|
Requires-Dist: ruff ; extra == 'dev'
|
|
22
22
|
Requires-Dist: pyright ; extra == 'dev'
|
|
23
23
|
Requires-Dist: pytest ; extra == 'tests'
|
|
24
|
+
Requires-Dist: pytest-asyncio ; extra == 'tests'
|
|
24
25
|
Requires-Dist: ruff ; extra == 'tests'
|
|
25
|
-
Requires-Dist: moto[s3] ; extra == 'tests'
|
|
26
|
+
Requires-Dist: moto[s3,server] ; extra == 'tests'
|
|
26
27
|
Requires-Dist: boto3 ; extra == 'tests'
|
|
27
28
|
Requires-Dist: botocore ; extra == 'tests'
|
|
28
29
|
Provides-Extra: dev
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "lance-context-api"
|
|
3
|
+
version = "0.2.4"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
license = "Apache-2.0"
|
|
6
|
+
authors = ["Lance Devs <dev@lancedb.com>"]
|
|
7
|
+
repository = "https://github.com/lancedb/lance-context"
|
|
8
|
+
description = "Shared request/response types for the lance-context REST API"
|
|
9
|
+
keywords = ["context", "lance", "api"]
|
|
10
|
+
|
|
11
|
+
[dependencies]
|
|
12
|
+
base64 = "0.22"
|
|
13
|
+
chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
|
|
14
|
+
serde = { version = "1", features = ["derive"] }
|
|
15
|
+
serde_json = "1"
|
|
16
|
+
thiserror = "2"
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
|
|
2
|
+
use chrono::{DateTime, Utc};
|
|
3
|
+
use serde::{Deserialize, Serialize};
|
|
4
|
+
use serde_json::Value;
|
|
5
|
+
use std::future::Future;
|
|
6
|
+
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Unified error
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
|
|
11
|
+
#[derive(Debug, thiserror::Error)]
|
|
12
|
+
pub enum ContextError {
|
|
13
|
+
#[error("{0}")]
|
|
14
|
+
NotFound(String),
|
|
15
|
+
#[error("{0}")]
|
|
16
|
+
AlreadyExists(String),
|
|
17
|
+
#[error("{0}")]
|
|
18
|
+
InvalidRequest(String),
|
|
19
|
+
#[error("{0}")]
|
|
20
|
+
Internal(String),
|
|
21
|
+
#[error("Compaction already in progress")]
|
|
22
|
+
CompactionInProgress,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
pub type ContextResult<T> = Result<T, ContextError>;
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Unified trait
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
pub trait ContextStoreApi {
|
|
32
|
+
fn add(
|
|
33
|
+
&mut self,
|
|
34
|
+
records: &[AddRecordRequest],
|
|
35
|
+
) -> impl Future<Output = ContextResult<AddRecordsResponse>> + Send;
|
|
36
|
+
|
|
37
|
+
fn get(&self, id: &str) -> impl Future<Output = ContextResult<Option<RecordDto>>> + Send;
|
|
38
|
+
|
|
39
|
+
fn list(
|
|
40
|
+
&self,
|
|
41
|
+
limit: Option<usize>,
|
|
42
|
+
offset: Option<usize>,
|
|
43
|
+
) -> impl Future<Output = ContextResult<Vec<RecordDto>>> + Send;
|
|
44
|
+
|
|
45
|
+
fn search(
|
|
46
|
+
&self,
|
|
47
|
+
query: &[f32],
|
|
48
|
+
limit: Option<usize>,
|
|
49
|
+
) -> impl Future<Output = ContextResult<Vec<SearchResultDto>>> + Send;
|
|
50
|
+
|
|
51
|
+
fn version(&self) -> u64;
|
|
52
|
+
|
|
53
|
+
fn checkout(&mut self, version: u64) -> impl Future<Output = ContextResult<()>> + Send;
|
|
54
|
+
|
|
55
|
+
fn compact(
|
|
56
|
+
&mut self,
|
|
57
|
+
options: Option<CompactRequest>,
|
|
58
|
+
) -> impl Future<Output = ContextResult<CompactResponse>> + Send;
|
|
59
|
+
|
|
60
|
+
fn compaction_stats(&self) -> impl Future<Output = ContextResult<CompactStatsResponse>> + Send;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Context lifecycle
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
68
|
+
pub struct CreateContextRequest {
|
|
69
|
+
pub name: String,
|
|
70
|
+
#[serde(default)]
|
|
71
|
+
pub storage_options: Option<std::collections::HashMap<String, String>>,
|
|
72
|
+
#[serde(default)]
|
|
73
|
+
pub id_index_type: Option<String>,
|
|
74
|
+
#[serde(default)]
|
|
75
|
+
pub blob_columns: Option<Vec<String>>,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
79
|
+
pub struct ContextInfo {
|
|
80
|
+
pub name: String,
|
|
81
|
+
pub uri: String,
|
|
82
|
+
pub version: u64,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
86
|
+
pub struct ListContextsResponse {
|
|
87
|
+
pub contexts: Vec<ContextInfo>,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
// Records
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
95
|
+
pub struct StateMetadataDto {
|
|
96
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
97
|
+
pub step: Option<i32>,
|
|
98
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
99
|
+
pub active_plan_id: Option<String>,
|
|
100
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
101
|
+
pub tokens_used: Option<i32>,
|
|
102
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
103
|
+
pub custom: Option<String>,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
|
107
|
+
pub struct AddRecordRequest {
|
|
108
|
+
#[serde(default = "default_role")]
|
|
109
|
+
pub role: String,
|
|
110
|
+
#[serde(default = "default_content_type")]
|
|
111
|
+
pub content_type: String,
|
|
112
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
113
|
+
pub text_payload: Option<String>,
|
|
114
|
+
#[serde(
|
|
115
|
+
default,
|
|
116
|
+
skip_serializing_if = "Option::is_none",
|
|
117
|
+
serialize_with = "serialize_base64_opt",
|
|
118
|
+
deserialize_with = "deserialize_base64_opt"
|
|
119
|
+
)]
|
|
120
|
+
pub binary_payload: Option<Vec<u8>>,
|
|
121
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
122
|
+
pub embedding: Option<Vec<f32>>,
|
|
123
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
124
|
+
pub bot_id: Option<String>,
|
|
125
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
126
|
+
pub session_id: Option<String>,
|
|
127
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
128
|
+
pub external_id: Option<String>,
|
|
129
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
130
|
+
pub state_metadata: Option<StateMetadataDto>,
|
|
131
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
132
|
+
pub metadata: Option<Value>,
|
|
133
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
134
|
+
pub expires_at: Option<DateTime<Utc>>,
|
|
135
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
136
|
+
pub retention_policy: Option<String>,
|
|
137
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
138
|
+
pub supersedes_id: Option<String>,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
142
|
+
pub struct AddRecordsRequest {
|
|
143
|
+
pub records: Vec<AddRecordRequest>,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
147
|
+
pub struct AddRecordsResponse {
|
|
148
|
+
pub version: u64,
|
|
149
|
+
pub ids: Vec<String>,
|
|
150
|
+
pub count: usize,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
154
|
+
pub struct RecordDto {
|
|
155
|
+
pub id: String,
|
|
156
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
157
|
+
pub external_id: Option<String>,
|
|
158
|
+
pub run_id: String,
|
|
159
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
160
|
+
pub bot_id: Option<String>,
|
|
161
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
162
|
+
pub session_id: Option<String>,
|
|
163
|
+
pub created_at: DateTime<Utc>,
|
|
164
|
+
pub role: String,
|
|
165
|
+
pub content_type: String,
|
|
166
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
167
|
+
pub text_payload: Option<String>,
|
|
168
|
+
#[serde(
|
|
169
|
+
default,
|
|
170
|
+
skip_serializing_if = "Option::is_none",
|
|
171
|
+
serialize_with = "serialize_base64_opt",
|
|
172
|
+
deserialize_with = "deserialize_base64_opt"
|
|
173
|
+
)]
|
|
174
|
+
pub binary_payload: Option<Vec<u8>>,
|
|
175
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
176
|
+
pub embedding: Option<Vec<f32>>,
|
|
177
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
178
|
+
pub state_metadata: Option<StateMetadataDto>,
|
|
179
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
180
|
+
pub metadata: Option<Value>,
|
|
181
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
182
|
+
pub expires_at: Option<DateTime<Utc>>,
|
|
183
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
184
|
+
pub retention_policy: Option<String>,
|
|
185
|
+
pub lifecycle_status: String,
|
|
186
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
187
|
+
pub retired_at: Option<DateTime<Utc>>,
|
|
188
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
189
|
+
pub retired_reason: Option<String>,
|
|
190
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
191
|
+
pub supersedes_id: Option<String>,
|
|
192
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
193
|
+
pub superseded_by_id: Option<String>,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
197
|
+
pub struct ListRecordsResponse {
|
|
198
|
+
pub records: Vec<RecordDto>,
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// ---------------------------------------------------------------------------
|
|
202
|
+
// Single record lookup
|
|
203
|
+
// ---------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
206
|
+
pub struct GetRecordResponse {
|
|
207
|
+
pub record: Option<RecordDto>,
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// ---------------------------------------------------------------------------
|
|
211
|
+
// Search
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
|
|
214
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
215
|
+
pub struct SearchRequest {
|
|
216
|
+
pub query: Vec<f32>,
|
|
217
|
+
#[serde(default = "default_search_limit")]
|
|
218
|
+
pub limit: usize,
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
222
|
+
pub struct SearchResultDto {
|
|
223
|
+
pub record: RecordDto,
|
|
224
|
+
pub distance: f32,
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
228
|
+
pub struct SearchResponse {
|
|
229
|
+
pub results: Vec<SearchResultDto>,
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// ---------------------------------------------------------------------------
|
|
233
|
+
// Versioning
|
|
234
|
+
// ---------------------------------------------------------------------------
|
|
235
|
+
|
|
236
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
237
|
+
pub struct VersionResponse {
|
|
238
|
+
pub version: u64,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
242
|
+
pub struct CheckoutRequest {
|
|
243
|
+
pub version: u64,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
// Compaction
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
|
|
250
|
+
#[derive(Debug, Default, Serialize, Deserialize)]
|
|
251
|
+
pub struct CompactRequest {
|
|
252
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
253
|
+
pub target_rows_per_fragment: Option<usize>,
|
|
254
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
255
|
+
pub materialize_deletions: Option<bool>,
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
259
|
+
pub struct CompactResponse {
|
|
260
|
+
pub fragments_removed: usize,
|
|
261
|
+
pub fragments_added: usize,
|
|
262
|
+
pub files_removed: usize,
|
|
263
|
+
pub files_added: usize,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
267
|
+
pub struct CompactStatsResponse {
|
|
268
|
+
pub total_fragments: usize,
|
|
269
|
+
pub is_compacting: bool,
|
|
270
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
271
|
+
pub last_compaction: Option<DateTime<Utc>>,
|
|
272
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
273
|
+
pub last_error: Option<String>,
|
|
274
|
+
pub total_compactions: u64,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// ---------------------------------------------------------------------------
|
|
278
|
+
// Error
|
|
279
|
+
// ---------------------------------------------------------------------------
|
|
280
|
+
|
|
281
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
282
|
+
pub struct ErrorBody {
|
|
283
|
+
pub code: String,
|
|
284
|
+
pub message: String,
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
#[derive(Debug, Serialize, Deserialize)]
|
|
288
|
+
pub struct ErrorResponse {
|
|
289
|
+
pub error: ErrorBody,
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// ---------------------------------------------------------------------------
|
|
293
|
+
// Helpers
|
|
294
|
+
// ---------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
fn default_content_type() -> String {
|
|
297
|
+
"text/plain".to_string()
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
fn default_role() -> String {
|
|
301
|
+
"user".to_string()
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
fn default_search_limit() -> usize {
|
|
305
|
+
10
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
fn serialize_base64_opt<S>(data: &Option<Vec<u8>>, serializer: S) -> Result<S::Ok, S::Error>
|
|
309
|
+
where
|
|
310
|
+
S: serde::Serializer,
|
|
311
|
+
{
|
|
312
|
+
match data {
|
|
313
|
+
Some(bytes) => serializer.serialize_some(&BASE64.encode(bytes)),
|
|
314
|
+
None => serializer.serialize_none(),
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
fn deserialize_base64_opt<'de, D>(deserializer: D) -> Result<Option<Vec<u8>>, D::Error>
|
|
319
|
+
where
|
|
320
|
+
D: serde::Deserializer<'de>,
|
|
321
|
+
{
|
|
322
|
+
let opt: Option<String> = Option::deserialize(deserializer)?;
|
|
323
|
+
match opt {
|
|
324
|
+
Some(s) => BASE64
|
|
325
|
+
.decode(&s)
|
|
326
|
+
.map(Some)
|
|
327
|
+
.map_err(serde::de::Error::custom),
|
|
328
|
+
None => Ok(None),
|
|
329
|
+
}
|
|
330
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "lance-context-core"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "Apache-2.0"
|
|
6
6
|
authors = ["Lance Devs <dev@lancedb.com>"]
|
|
@@ -11,18 +11,22 @@ keywords = ["context", "multimodal", "lance", "agents", "storage"]
|
|
|
11
11
|
categories = ["database", "data-structures", "science"]
|
|
12
12
|
|
|
13
13
|
[dependencies]
|
|
14
|
-
arrow-array = "
|
|
15
|
-
arrow-ipc = "
|
|
16
|
-
arrow-schema = "
|
|
14
|
+
arrow-array = "58"
|
|
15
|
+
arrow-ipc = "58"
|
|
16
|
+
arrow-schema = "58"
|
|
17
17
|
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
|
18
|
-
lance = "
|
|
19
|
-
|
|
20
|
-
lance-
|
|
21
|
-
lance-
|
|
18
|
+
lance = "7.0.0"
|
|
19
|
+
lance-context-api = { path = "../lance-context-api" }
|
|
20
|
+
lance-index = "7.0.0"
|
|
21
|
+
lance-namespace = "7.0.0"
|
|
22
|
+
lancedb = "0.30.0"
|
|
23
|
+
lance-graph = "0.5.4"
|
|
22
24
|
serde = { version = "1", features = ["derive"] }
|
|
25
|
+
serde_json = "1"
|
|
23
26
|
futures = "0.3"
|
|
24
27
|
tokio = { version = "1", features = ["sync", "time"] }
|
|
25
28
|
tracing = "0.1"
|
|
29
|
+
uuid = { version = "1.20.0", features = ["v4", "v5"] }
|
|
26
30
|
|
|
27
31
|
[dev-dependencies]
|
|
28
32
|
tempfile = "3"
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
use chrono::Utc;
|
|
2
|
+
use uuid::Uuid;
|
|
3
|
+
|
|
4
|
+
use lance_context_api::{
|
|
5
|
+
AddRecordRequest, AddRecordsResponse, CompactRequest, CompactResponse, CompactStatsResponse,
|
|
6
|
+
ContextError, ContextResult, ContextStoreApi, RecordDto, SearchResultDto, StateMetadataDto,
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
use crate::record::{ContextRecord, StateMetadata, LIFECYCLE_ACTIVE};
|
|
10
|
+
use crate::store::{CompactionConfig, ContextStore};
|
|
11
|
+
|
|
12
|
+
impl ContextStoreApi for ContextStore {
|
|
13
|
+
async fn add(&mut self, records: &[AddRecordRequest]) -> ContextResult<AddRecordsResponse> {
|
|
14
|
+
let run_id = Uuid::new_v4().to_string();
|
|
15
|
+
let mut ids = Vec::with_capacity(records.len());
|
|
16
|
+
let mut core_records = Vec::with_capacity(records.len());
|
|
17
|
+
|
|
18
|
+
for r in records {
|
|
19
|
+
let id = Uuid::new_v4().to_string();
|
|
20
|
+
ids.push(id.clone());
|
|
21
|
+
core_records.push(ContextRecord {
|
|
22
|
+
id,
|
|
23
|
+
external_id: r.external_id.clone(),
|
|
24
|
+
run_id: run_id.clone(),
|
|
25
|
+
bot_id: r.bot_id.clone(),
|
|
26
|
+
session_id: r.session_id.clone(),
|
|
27
|
+
created_at: Utc::now(),
|
|
28
|
+
role: r.role.clone(),
|
|
29
|
+
state_metadata: r.state_metadata.as_ref().map(|sm| StateMetadata {
|
|
30
|
+
step: sm.step,
|
|
31
|
+
active_plan_id: sm.active_plan_id.clone(),
|
|
32
|
+
tokens_used: sm.tokens_used,
|
|
33
|
+
custom: sm.custom.clone(),
|
|
34
|
+
}),
|
|
35
|
+
metadata: r.metadata.clone(),
|
|
36
|
+
expires_at: r.expires_at,
|
|
37
|
+
retention_policy: r.retention_policy.clone(),
|
|
38
|
+
lifecycle_status: LIFECYCLE_ACTIVE.to_string(),
|
|
39
|
+
retired_at: None,
|
|
40
|
+
retired_reason: None,
|
|
41
|
+
supersedes_id: r.supersedes_id.clone(),
|
|
42
|
+
superseded_by_id: None,
|
|
43
|
+
content_type: r.content_type.clone(),
|
|
44
|
+
text_payload: r.text_payload.clone(),
|
|
45
|
+
binary_payload: r.binary_payload.clone(),
|
|
46
|
+
embedding: r.embedding.clone(),
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
let count = core_records.len();
|
|
51
|
+
let version = self.add(&core_records).await.map_err(to_ctx_err)?;
|
|
52
|
+
Ok(AddRecordsResponse {
|
|
53
|
+
version,
|
|
54
|
+
ids,
|
|
55
|
+
count,
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async fn get(&self, id: &str) -> ContextResult<Option<RecordDto>> {
|
|
60
|
+
let record = ContextStore::get(self, id).await.map_err(to_ctx_err)?;
|
|
61
|
+
Ok(record.map(record_to_dto))
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async fn list(
|
|
65
|
+
&self,
|
|
66
|
+
limit: Option<usize>,
|
|
67
|
+
offset: Option<usize>,
|
|
68
|
+
) -> ContextResult<Vec<RecordDto>> {
|
|
69
|
+
let records = ContextStore::list(self, limit, offset)
|
|
70
|
+
.await
|
|
71
|
+
.map_err(to_ctx_err)?;
|
|
72
|
+
Ok(records.into_iter().map(record_to_dto).collect())
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async fn search(
|
|
76
|
+
&self,
|
|
77
|
+
query: &[f32],
|
|
78
|
+
limit: Option<usize>,
|
|
79
|
+
) -> ContextResult<Vec<SearchResultDto>> {
|
|
80
|
+
let results = ContextStore::search(self, query, limit)
|
|
81
|
+
.await
|
|
82
|
+
.map_err(to_ctx_err)?;
|
|
83
|
+
Ok(results
|
|
84
|
+
.into_iter()
|
|
85
|
+
.map(|sr| SearchResultDto {
|
|
86
|
+
record: record_to_dto(sr.record),
|
|
87
|
+
distance: sr.distance,
|
|
88
|
+
})
|
|
89
|
+
.collect())
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
fn version(&self) -> u64 {
|
|
93
|
+
ContextStore::version(self)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async fn checkout(&mut self, version: u64) -> ContextResult<()> {
|
|
97
|
+
ContextStore::checkout(self, version)
|
|
98
|
+
.await
|
|
99
|
+
.map_err(to_ctx_err)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async fn compact(&mut self, options: Option<CompactRequest>) -> ContextResult<CompactResponse> {
|
|
103
|
+
let config = options.map(|req| {
|
|
104
|
+
let mut c = CompactionConfig::default();
|
|
105
|
+
if let Some(v) = req.target_rows_per_fragment {
|
|
106
|
+
c.target_rows_per_fragment = v;
|
|
107
|
+
}
|
|
108
|
+
if let Some(v) = req.materialize_deletions {
|
|
109
|
+
c.materialize_deletions = v;
|
|
110
|
+
}
|
|
111
|
+
c
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
let metrics = ContextStore::compact(self, config)
|
|
115
|
+
.await
|
|
116
|
+
.map_err(to_ctx_err)?;
|
|
117
|
+
Ok(CompactResponse {
|
|
118
|
+
fragments_removed: metrics.fragments_removed,
|
|
119
|
+
fragments_added: metrics.fragments_added,
|
|
120
|
+
files_removed: metrics.files_removed,
|
|
121
|
+
files_added: metrics.files_added,
|
|
122
|
+
})
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async fn compaction_stats(&self) -> ContextResult<CompactStatsResponse> {
|
|
126
|
+
let stats = ContextStore::compaction_stats(self)
|
|
127
|
+
.await
|
|
128
|
+
.map_err(to_ctx_err)?;
|
|
129
|
+
Ok(CompactStatsResponse {
|
|
130
|
+
total_fragments: stats.total_fragments,
|
|
131
|
+
is_compacting: stats.is_compacting,
|
|
132
|
+
last_compaction: stats.last_compaction,
|
|
133
|
+
last_error: stats.last_error,
|
|
134
|
+
total_compactions: stats.total_compactions,
|
|
135
|
+
})
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
fn record_to_dto(r: ContextRecord) -> RecordDto {
|
|
140
|
+
RecordDto {
|
|
141
|
+
id: r.id,
|
|
142
|
+
external_id: r.external_id,
|
|
143
|
+
run_id: r.run_id,
|
|
144
|
+
bot_id: r.bot_id,
|
|
145
|
+
session_id: r.session_id,
|
|
146
|
+
created_at: r.created_at,
|
|
147
|
+
role: r.role,
|
|
148
|
+
content_type: r.content_type,
|
|
149
|
+
text_payload: r.text_payload,
|
|
150
|
+
binary_payload: r.binary_payload,
|
|
151
|
+
embedding: r.embedding,
|
|
152
|
+
state_metadata: r.state_metadata.map(|sm| StateMetadataDto {
|
|
153
|
+
step: sm.step,
|
|
154
|
+
active_plan_id: sm.active_plan_id,
|
|
155
|
+
tokens_used: sm.tokens_used,
|
|
156
|
+
custom: sm.custom,
|
|
157
|
+
}),
|
|
158
|
+
metadata: r.metadata,
|
|
159
|
+
expires_at: r.expires_at,
|
|
160
|
+
retention_policy: r.retention_policy,
|
|
161
|
+
lifecycle_status: r.lifecycle_status,
|
|
162
|
+
retired_at: r.retired_at,
|
|
163
|
+
retired_reason: r.retired_reason,
|
|
164
|
+
supersedes_id: r.supersedes_id,
|
|
165
|
+
superseded_by_id: r.superseded_by_id,
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
fn to_ctx_err(err: lance::Error) -> ContextError {
|
|
170
|
+
let msg = err.to_string();
|
|
171
|
+
if msg.contains("already in progress") {
|
|
172
|
+
ContextError::CompactionInProgress
|
|
173
|
+
} else if msg.contains("not found") || msg.contains("DatasetNotFound") {
|
|
174
|
+
ContextError::NotFound(msg)
|
|
175
|
+
} else if msg.contains("Invalid") {
|
|
176
|
+
ContextError::InvalidRequest(msg)
|
|
177
|
+
} else {
|
|
178
|
+
ContextError::Internal(msg)
|
|
179
|
+
}
|
|
180
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//! Core types for the lance-context storage layer.
|
|
2
|
+
#![recursion_limit = "256"]
|
|
3
|
+
|
|
4
|
+
mod api_impl;
|
|
5
|
+
mod context;
|
|
6
|
+
mod record;
|
|
7
|
+
pub mod serde;
|
|
8
|
+
mod store;
|
|
9
|
+
|
|
10
|
+
pub use context::{Context, ContextEntry, Snapshot};
|
|
11
|
+
pub use record::{
|
|
12
|
+
ContextRecord, LifecycleQueryOptions, MetadataFilter, RecordFilters, SearchResult,
|
|
13
|
+
StateMetadata, LIFECYCLE_ACTIVE, LIFECYCLE_CONTRADICTED,
|
|
14
|
+
};
|
|
15
|
+
pub use store::{
|
|
16
|
+
CompactionConfig, CompactionStats, ContextStore, ContextStoreOptions, IdIndexType,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// Re-export CompactionMetrics from lance for Python bindings
|
|
20
|
+
pub use lance::dataset::optimize::CompactionMetrics;
|