@f0rbit/corpus 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backend/cloudflare.d.ts.map +1 -1
- package/dist/backend/cloudflare.js +140 -23
- package/dist/backend/file.d.ts.map +1 -1
- package/dist/backend/file.js +47 -48
- package/dist/backend/layered.d.ts.map +1 -1
- package/dist/backend/layered.js +67 -19
- package/dist/backend/memory.d.ts +2 -1
- package/dist/backend/memory.d.ts.map +1 -1
- package/dist/backend/memory.js +29 -43
- package/dist/corpus.d.ts +11 -0
- package/dist/corpus.d.ts.map +1 -1
- package/dist/corpus.js +52 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/observations/client.d.ts +12 -0
- package/dist/observations/client.d.ts.map +1 -0
- package/dist/observations/client.js +115 -0
- package/dist/observations/index.d.ts +12 -0
- package/dist/observations/index.d.ts.map +1 -0
- package/dist/observations/index.js +11 -0
- package/dist/observations/schema.d.ts +267 -0
- package/dist/observations/schema.d.ts.map +1 -0
- package/dist/observations/schema.js +55 -0
- package/dist/observations/storage.d.ts +75 -0
- package/dist/observations/storage.d.ts.map +1 -0
- package/dist/observations/storage.js +137 -0
- package/dist/observations/types.d.ts +219 -0
- package/dist/observations/types.d.ts.map +1 -0
- package/dist/observations/types.js +40 -0
- package/dist/observations/utils.d.ts +183 -0
- package/dist/observations/utils.d.ts.map +1 -0
- package/dist/observations/utils.js +272 -0
- package/dist/sst.d.ts +1 -1
- package/dist/sst.d.ts.map +1 -1
- package/dist/sst.js +20 -0
- package/dist/types.d.ts +61 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils.d.ts +38 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +84 -0
- package/package.json +71 -67
- package/dist/codecs.d.ts +0 -8
- package/dist/codecs.d.ts.map +0 -1
- package/dist/codecs.js +0 -6
- package/dist/core.d.ts +0 -9
- package/dist/core.d.ts.map +0 -1
- package/dist/core.js +0 -7
package/dist/corpus.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { ok, err } from './types';
|
|
6
6
|
import { compute_hash, generate_version } from './utils';
|
|
7
|
+
import { create_pointer, resolve_path, apply_span } from './observations/utils';
|
|
7
8
|
/**
|
|
8
9
|
* Creates a typed Store instance bound to a Backend.
|
|
9
10
|
* @category Core
|
|
@@ -190,11 +191,23 @@ export function create_store(backend, definition) {
|
|
|
190
191
|
* // Type-safe access to stores
|
|
191
192
|
* await corpus.stores.users.put({ name: 'Alice', email: 'alice@example.com' })
|
|
192
193
|
* await corpus.stores.notes.put('Hello, world!')
|
|
194
|
+
*
|
|
195
|
+
* // With observations
|
|
196
|
+
* const corpus_with_obs = create_corpus()
|
|
197
|
+
* .with_backend(create_memory_backend())
|
|
198
|
+
* .with_store(users)
|
|
199
|
+
* .with_observations([EntityType, SentimentType])
|
|
200
|
+
* .build()
|
|
201
|
+
*
|
|
202
|
+
* // Pointer utilities
|
|
203
|
+
* const pointer = corpus_with_obs.create_pointer('users', 'v123', '$.name')
|
|
204
|
+
* const value = await corpus_with_obs.resolve_pointer(pointer)
|
|
193
205
|
* ```
|
|
194
206
|
*/
|
|
195
207
|
export function create_corpus() {
|
|
196
208
|
let backend = null;
|
|
197
209
|
const definitions = [];
|
|
210
|
+
let observation_types = [];
|
|
198
211
|
const builder = {
|
|
199
212
|
with_backend(b) {
|
|
200
213
|
backend = b;
|
|
@@ -204,6 +217,10 @@ export function create_corpus() {
|
|
|
204
217
|
definitions.push(definition);
|
|
205
218
|
return builder;
|
|
206
219
|
},
|
|
220
|
+
with_observations(types) {
|
|
221
|
+
observation_types = types;
|
|
222
|
+
return builder;
|
|
223
|
+
},
|
|
207
224
|
build() {
|
|
208
225
|
if (!backend) {
|
|
209
226
|
throw new Error('Backend is required. Call with_backend() first.');
|
|
@@ -213,10 +230,45 @@ export function create_corpus() {
|
|
|
213
230
|
for (const def of definitions) {
|
|
214
231
|
stores[def.id] = create_store(b, def);
|
|
215
232
|
}
|
|
233
|
+
const observations_client = observation_types.length > 0 && 'observations' in b
|
|
234
|
+
? b.observations
|
|
235
|
+
: undefined;
|
|
236
|
+
async function resolve_pointer_impl(pointer) {
|
|
237
|
+
const store = stores[pointer.store_id];
|
|
238
|
+
if (!store) {
|
|
239
|
+
return err({ kind: 'not_found', store_id: pointer.store_id, version: pointer.version });
|
|
240
|
+
}
|
|
241
|
+
const snapshot_result = await store.get(pointer.version);
|
|
242
|
+
if (!snapshot_result.ok)
|
|
243
|
+
return snapshot_result;
|
|
244
|
+
let value = snapshot_result.value.data;
|
|
245
|
+
if (pointer.path) {
|
|
246
|
+
const path_result = resolve_path(value, pointer.path);
|
|
247
|
+
if (!path_result.ok)
|
|
248
|
+
return path_result;
|
|
249
|
+
value = path_result.value;
|
|
250
|
+
}
|
|
251
|
+
if (pointer.span && typeof value === 'string') {
|
|
252
|
+
const span_result = apply_span(value, pointer.span);
|
|
253
|
+
if (!span_result.ok)
|
|
254
|
+
return span_result;
|
|
255
|
+
value = span_result.value;
|
|
256
|
+
}
|
|
257
|
+
return ok(value);
|
|
258
|
+
}
|
|
259
|
+
async function is_superseded_impl(pointer) {
|
|
260
|
+
if (!observations_client?.is_stale)
|
|
261
|
+
return false;
|
|
262
|
+
return observations_client.is_stale(pointer);
|
|
263
|
+
}
|
|
216
264
|
return {
|
|
217
265
|
stores,
|
|
218
266
|
metadata: b.metadata,
|
|
219
267
|
data: b.data,
|
|
268
|
+
observations: observations_client,
|
|
269
|
+
create_pointer,
|
|
270
|
+
resolve_pointer: resolve_pointer_impl,
|
|
271
|
+
is_superseded: is_superseded_impl,
|
|
220
272
|
};
|
|
221
273
|
},
|
|
222
274
|
};
|
package/dist/index.d.ts
CHANGED
|
@@ -5,7 +5,8 @@ export { create_cloudflare_backend, type CloudflareBackendConfig } from './backe
|
|
|
5
5
|
export { create_layered_backend, type LayeredBackendOptions } from './backend/layered';
|
|
6
6
|
export { json_codec, text_codec, binary_codec, compute_hash, generate_version } from './utils';
|
|
7
7
|
export { corpus_snapshots, type CorpusSnapshotRow, type CorpusSnapshotInsert } from './schema';
|
|
8
|
-
export type { ContentType, ParentRef, SnapshotMeta, Snapshot, DataHandle, MetadataClient, DataClient, ListOpts, Backend, Codec, Store, StoreDefinition, DefineStoreOpts, DataKeyContext, PutOpts, CorpusBuilder, Corpus, CorpusError, Result, CorpusEvent, EventHandler, } from './types';
|
|
8
|
+
export type { ContentType, ParentRef, SnapshotMeta, Snapshot, DataHandle, MetadataClient, DataClient, ListOpts, Backend, Codec, Store, StoreDefinition, DefineStoreOpts, DataKeyContext, PutOpts, CorpusBuilder, Corpus, CorpusError, Result, CorpusEvent, EventHandler, ObservationsClient, } from './types';
|
|
9
9
|
export { ok, err, define_store } from './types';
|
|
10
|
+
export * from './observations';
|
|
10
11
|
export { createCorpusInfra, CORPUS_MIGRATION_SQL, type CorpusInfra, type CorpusInfraConfig } from './sst';
|
|
11
12
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAEtD,OAAO,EAAE,qBAAqB,EAAE,KAAK,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACnF,OAAO,EAAE,mBAAmB,EAAE,KAAK,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,yBAAyB,EAAE,KAAK,uBAAuB,EAAE,MAAM,sBAAsB,CAAA;AAC9F,OAAO,EAAE,sBAAsB,EAAE,KAAK,qBAAqB,EAAE,MAAM,mBAAmB,CAAA;AAEtF,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAE9F,OAAO,EAAE,gBAAgB,EAAE,KAAK,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,MAAM,UAAU,CAAA;AAE9F,YAAY,EACV,WAAW,EACX,SAAS,EACT,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,cAAc,EACd,UAAU,EACV,QAAQ,EACR,OAAO,EACP,KAAK,EACL,KAAK,EACL,eAAe,EACf,eAAe,EACf,cAAc,EACd,OAAO,EACP,aAAa,EACb,MAAM,EACN,WAAW,EACX,MAAM,EACN,WAAW,EACX,YAAY,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAEtD,OAAO,EAAE,qBAAqB,EAAE,KAAK,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACnF,OAAO,EAAE,mBAAmB,EAAE,KAAK,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,yBAAyB,EAAE,KAAK,uBAAuB,EAAE,MAAM,sBAAsB,CAAA;AAC9F,OAAO,EAAE,sBAAsB,EAAE,KAAK,qBAAqB,EAAE,MAAM,mBAAmB,CAAA;AAEtF,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAE9F,OAAO,EAAE,gBAAgB,EAAE,KAAK,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,MAAM,UAAU,CAAA;AAE9F,YAAY,EACV,WAAW,EACX,SAAS,EACT,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,cAAc,EACd,UAAU,EACV,QAAQ,EACR,OAAO,EACP,KAAK,EACL,KAAK,EACL,eAAe,EACf,eAAe,EACf,cAAc,EACd,OAAO,EACP,aAAa,EACb,MAAM,EACN,WAAW,EACX,MAAM,EACN,WAAW,EACX,YAAY,EACZ,kBAAkB,GACnB,MAAM,SAAS,CAAA;AAEhB,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAE/C,cAAc,gBAAgB,CAAA;AAE9B,OAAO,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,KAAK,WAAW,EAAE,KAAK,iBAAiB,EAAE,MAAM,OAAO,CAAA"}
|
package/dist/index.js
CHANGED
|
@@ -6,4 +6,5 @@ export { create_layered_backend } from './backend/layered';
|
|
|
6
6
|
export { json_codec, text_codec, binary_codec, compute_hash, generate_version } from './utils';
|
|
7
7
|
export { corpus_snapshots } from './schema';
|
|
8
8
|
export { ok, err, define_store } from './types';
|
|
9
|
+
export * from './observations';
|
|
9
10
|
export { createCorpusInfra, CORPUS_MIGRATION_SQL } from './sst';
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module ObservationsClient
|
|
3
|
+
* @description Centralized business logic for observations, built on storage adapters.
|
|
4
|
+
*/
|
|
5
|
+
import type { MetadataClient, ObservationsClient } from '../types';
|
|
6
|
+
import type { ObservationsStorage } from './storage';
|
|
7
|
+
/**
|
|
8
|
+
* Creates an ObservationsClient from a storage adapter.
|
|
9
|
+
* All business logic (validation, staleness, etc.) is centralized here.
|
|
10
|
+
*/
|
|
11
|
+
export declare function create_observations_client(storage: ObservationsStorage, metadata: MetadataClient): ObservationsClient;
|
|
12
|
+
//# sourceMappingURL=client.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../observations/client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAuB,cAAc,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAEvF,OAAO,KAAK,EAAE,mBAAmB,EAAoB,MAAM,WAAW,CAAA;AAuBtE;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,mBAAmB,EAC5B,QAAQ,EAAE,cAAc,GACvB,kBAAkB,CAqGpB"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module ObservationsClient
|
|
3
|
+
* @description Centralized business logic for observations, built on storage adapters.
|
|
4
|
+
*/
|
|
5
|
+
import { row_to_observation, row_to_meta, create_observation_row } from './storage';
|
|
6
|
+
import { generate_observation_id } from './utils';
|
|
7
|
+
import { ok, err } from '../types';
|
|
8
|
+
/**
|
|
9
|
+
* Convert client query opts to storage query opts.
|
|
10
|
+
* Handles Date -> ISO string conversion.
|
|
11
|
+
*/
|
|
12
|
+
function to_storage_opts(opts) {
|
|
13
|
+
return {
|
|
14
|
+
type: opts.type,
|
|
15
|
+
source_store_id: opts.source_store,
|
|
16
|
+
source_version: opts.source_version,
|
|
17
|
+
source_prefix: opts.source_prefix,
|
|
18
|
+
created_after: opts.created_after?.toISOString(),
|
|
19
|
+
created_before: opts.created_before?.toISOString(),
|
|
20
|
+
observed_after: opts.after?.toISOString(),
|
|
21
|
+
observed_before: opts.before?.toISOString(),
|
|
22
|
+
limit: opts.limit
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Creates an ObservationsClient from a storage adapter.
|
|
27
|
+
* All business logic (validation, staleness, etc.) is centralized here.
|
|
28
|
+
*/
|
|
29
|
+
export function create_observations_client(storage, metadata) {
|
|
30
|
+
async function get_latest_version(store_id) {
|
|
31
|
+
const result = await metadata.get_latest(store_id);
|
|
32
|
+
return result.ok ? result.value.version : null;
|
|
33
|
+
}
|
|
34
|
+
return {
|
|
35
|
+
async put(type, opts) {
|
|
36
|
+
const validation = type.schema.safeParse(opts.content);
|
|
37
|
+
if (!validation.success) {
|
|
38
|
+
return err({
|
|
39
|
+
kind: 'validation_error',
|
|
40
|
+
cause: validation.error,
|
|
41
|
+
message: validation.error.message
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
const id = generate_observation_id();
|
|
45
|
+
const row = create_observation_row(id, type.name, opts.source, validation.data, {
|
|
46
|
+
confidence: opts.confidence,
|
|
47
|
+
observed_at: opts.observed_at,
|
|
48
|
+
derived_from: opts.derived_from
|
|
49
|
+
});
|
|
50
|
+
const result = await storage.put_row(row);
|
|
51
|
+
if (!result.ok)
|
|
52
|
+
return result;
|
|
53
|
+
const observation = {
|
|
54
|
+
id,
|
|
55
|
+
type: type.name,
|
|
56
|
+
source: opts.source,
|
|
57
|
+
content: validation.data,
|
|
58
|
+
...(opts.confidence !== undefined && { confidence: opts.confidence }),
|
|
59
|
+
...(opts.observed_at && { observed_at: opts.observed_at }),
|
|
60
|
+
created_at: new Date(row.created_at),
|
|
61
|
+
...(opts.derived_from && { derived_from: opts.derived_from })
|
|
62
|
+
};
|
|
63
|
+
return ok(observation);
|
|
64
|
+
},
|
|
65
|
+
async get(id) {
|
|
66
|
+
const result = await storage.get_row(id);
|
|
67
|
+
if (!result.ok)
|
|
68
|
+
return result;
|
|
69
|
+
if (!result.value) {
|
|
70
|
+
return err({ kind: 'observation_not_found', id });
|
|
71
|
+
}
|
|
72
|
+
return ok(row_to_observation(result.value));
|
|
73
|
+
},
|
|
74
|
+
async *query(opts = {}) {
|
|
75
|
+
const storageOpts = to_storage_opts(opts);
|
|
76
|
+
for await (const row of storage.query_rows(storageOpts)) {
|
|
77
|
+
if (!opts.include_stale) {
|
|
78
|
+
const latest = await get_latest_version(row.source_store_id);
|
|
79
|
+
if (latest && row.source_version !== latest)
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
yield row_to_observation(row);
|
|
83
|
+
}
|
|
84
|
+
},
|
|
85
|
+
async *query_meta(opts = {}) {
|
|
86
|
+
const storageOpts = to_storage_opts(opts);
|
|
87
|
+
for await (const row of storage.query_rows(storageOpts)) {
|
|
88
|
+
if (!opts.include_stale) {
|
|
89
|
+
const latest = await get_latest_version(row.source_store_id);
|
|
90
|
+
if (latest && row.source_version !== latest)
|
|
91
|
+
continue;
|
|
92
|
+
}
|
|
93
|
+
yield row_to_meta(row);
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
async delete(id) {
|
|
97
|
+
const result = await storage.delete_row(id);
|
|
98
|
+
if (!result.ok)
|
|
99
|
+
return result;
|
|
100
|
+
if (!result.value) {
|
|
101
|
+
return err({ kind: 'observation_not_found', id });
|
|
102
|
+
}
|
|
103
|
+
return ok(undefined);
|
|
104
|
+
},
|
|
105
|
+
async delete_by_source(source) {
|
|
106
|
+
return storage.delete_by_source(source.store_id, source.version, source.path);
|
|
107
|
+
},
|
|
108
|
+
async is_stale(pointer) {
|
|
109
|
+
const latest = await get_latest_version(pointer.store_id);
|
|
110
|
+
if (!latest)
|
|
111
|
+
return false;
|
|
112
|
+
return pointer.version !== latest;
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module Observations
|
|
3
|
+
* @description Re-exports for the observations feature.
|
|
4
|
+
*/
|
|
5
|
+
export * from './types';
|
|
6
|
+
export type { ObservationRow, ObservationInsert } from './schema';
|
|
7
|
+
export { corpus_observations } from './schema';
|
|
8
|
+
export type { ObservationsStorage, StorageQueryOpts, ObservationsCRUD } from './storage';
|
|
9
|
+
export { row_to_observation, row_to_meta, create_observation_row, filter_observation_rows, create_observations_storage } from './storage';
|
|
10
|
+
export { create_observations_client } from './client';
|
|
11
|
+
export * from './utils';
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../observations/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,SAAS,CAAA;AACvB,YAAY,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA;AACjE,OAAO,EAAE,mBAAmB,EAAE,MAAM,UAAU,CAAA;AAC9C,YAAY,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAA;AAGxF,OAAO,EAAE,kBAAkB,EAAE,WAAW,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,2BAA2B,EAAE,MAAM,WAAW,CAAA;AACzI,OAAO,EAAE,0BAA0B,EAAE,MAAM,UAAU,CAAA;AACrD,cAAc,SAAS,CAAA"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module Observations
|
|
3
|
+
* @description Re-exports for the observations feature.
|
|
4
|
+
*/
|
|
5
|
+
// Types
|
|
6
|
+
export * from './types';
|
|
7
|
+
export { corpus_observations } from './schema';
|
|
8
|
+
// Functions
|
|
9
|
+
export { row_to_observation, row_to_meta, create_observation_row, filter_observation_rows, create_observations_storage } from './storage';
|
|
10
|
+
export { create_observations_client } from './client';
|
|
11
|
+
export * from './utils';
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module ObservationSchema
|
|
3
|
+
* @description Database schema definitions for observations using Drizzle ORM.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Drizzle ORM schema for the corpus_observations table.
|
|
7
|
+
*
|
|
8
|
+
* Used by the Cloudflare backend with D1 (SQLite). Defines the table structure
|
|
9
|
+
* for storing observation metadata linking structured facts to versioned content.
|
|
10
|
+
*
|
|
11
|
+
* Columns:
|
|
12
|
+
* - `id` - Primary key (unique observation identifier)
|
|
13
|
+
* - `type` - Observation type name (e.g., 'entity_mention')
|
|
14
|
+
* - `source_store_id` + `source_version` - Points to the snapshot this observation is about
|
|
15
|
+
* - `source_path` - Optional JSONPath expression to specific element
|
|
16
|
+
* - `source_span_start` / `source_span_end` - Optional character range within text
|
|
17
|
+
* - `content` - JSON-encoded observation data
|
|
18
|
+
* - `confidence` - Optional confidence score (0.0 to 1.0)
|
|
19
|
+
* - `observed_at` - When the observation was made (ISO 8601)
|
|
20
|
+
* - `created_at` - When the record was stored (ISO 8601)
|
|
21
|
+
* - `derived_from` - Optional JSON array of SnapshotPointers for provenance
|
|
22
|
+
*
|
|
23
|
+
* @example
|
|
24
|
+
* ```ts
|
|
25
|
+
* import { drizzle } from 'drizzle-orm/d1'
|
|
26
|
+
* import { corpus_observations } from 'corpus/observation-schema'
|
|
27
|
+
*
|
|
28
|
+
* const db = drizzle(env.D1)
|
|
29
|
+
* const rows = await db.select().from(corpus_observations).where(eq(corpus_observations.type, 'entity_mention'))
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export declare const corpus_observations: import("drizzle-orm/sqlite-core").SQLiteTableWithColumns<{
|
|
33
|
+
name: "corpus_observations";
|
|
34
|
+
schema: undefined;
|
|
35
|
+
columns: {
|
|
36
|
+
id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
37
|
+
name: "id";
|
|
38
|
+
tableName: "corpus_observations";
|
|
39
|
+
dataType: "string";
|
|
40
|
+
columnType: "SQLiteText";
|
|
41
|
+
data: string;
|
|
42
|
+
driverParam: string;
|
|
43
|
+
notNull: true;
|
|
44
|
+
hasDefault: false;
|
|
45
|
+
isPrimaryKey: true;
|
|
46
|
+
isAutoincrement: false;
|
|
47
|
+
hasRuntimeDefault: false;
|
|
48
|
+
enumValues: [string, ...string[]];
|
|
49
|
+
baseColumn: never;
|
|
50
|
+
identity: undefined;
|
|
51
|
+
generated: undefined;
|
|
52
|
+
}, {}, {
|
|
53
|
+
length: number | undefined;
|
|
54
|
+
}>;
|
|
55
|
+
type: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
56
|
+
name: "type";
|
|
57
|
+
tableName: "corpus_observations";
|
|
58
|
+
dataType: "string";
|
|
59
|
+
columnType: "SQLiteText";
|
|
60
|
+
data: string;
|
|
61
|
+
driverParam: string;
|
|
62
|
+
notNull: true;
|
|
63
|
+
hasDefault: false;
|
|
64
|
+
isPrimaryKey: false;
|
|
65
|
+
isAutoincrement: false;
|
|
66
|
+
hasRuntimeDefault: false;
|
|
67
|
+
enumValues: [string, ...string[]];
|
|
68
|
+
baseColumn: never;
|
|
69
|
+
identity: undefined;
|
|
70
|
+
generated: undefined;
|
|
71
|
+
}, {}, {
|
|
72
|
+
length: number | undefined;
|
|
73
|
+
}>;
|
|
74
|
+
source_store_id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
75
|
+
name: "source_store_id";
|
|
76
|
+
tableName: "corpus_observations";
|
|
77
|
+
dataType: "string";
|
|
78
|
+
columnType: "SQLiteText";
|
|
79
|
+
data: string;
|
|
80
|
+
driverParam: string;
|
|
81
|
+
notNull: true;
|
|
82
|
+
hasDefault: false;
|
|
83
|
+
isPrimaryKey: false;
|
|
84
|
+
isAutoincrement: false;
|
|
85
|
+
hasRuntimeDefault: false;
|
|
86
|
+
enumValues: [string, ...string[]];
|
|
87
|
+
baseColumn: never;
|
|
88
|
+
identity: undefined;
|
|
89
|
+
generated: undefined;
|
|
90
|
+
}, {}, {
|
|
91
|
+
length: number | undefined;
|
|
92
|
+
}>;
|
|
93
|
+
source_version: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
94
|
+
name: "source_version";
|
|
95
|
+
tableName: "corpus_observations";
|
|
96
|
+
dataType: "string";
|
|
97
|
+
columnType: "SQLiteText";
|
|
98
|
+
data: string;
|
|
99
|
+
driverParam: string;
|
|
100
|
+
notNull: true;
|
|
101
|
+
hasDefault: false;
|
|
102
|
+
isPrimaryKey: false;
|
|
103
|
+
isAutoincrement: false;
|
|
104
|
+
hasRuntimeDefault: false;
|
|
105
|
+
enumValues: [string, ...string[]];
|
|
106
|
+
baseColumn: never;
|
|
107
|
+
identity: undefined;
|
|
108
|
+
generated: undefined;
|
|
109
|
+
}, {}, {
|
|
110
|
+
length: number | undefined;
|
|
111
|
+
}>;
|
|
112
|
+
source_path: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
113
|
+
name: "source_path";
|
|
114
|
+
tableName: "corpus_observations";
|
|
115
|
+
dataType: "string";
|
|
116
|
+
columnType: "SQLiteText";
|
|
117
|
+
data: string;
|
|
118
|
+
driverParam: string;
|
|
119
|
+
notNull: false;
|
|
120
|
+
hasDefault: false;
|
|
121
|
+
isPrimaryKey: false;
|
|
122
|
+
isAutoincrement: false;
|
|
123
|
+
hasRuntimeDefault: false;
|
|
124
|
+
enumValues: [string, ...string[]];
|
|
125
|
+
baseColumn: never;
|
|
126
|
+
identity: undefined;
|
|
127
|
+
generated: undefined;
|
|
128
|
+
}, {}, {
|
|
129
|
+
length: number | undefined;
|
|
130
|
+
}>;
|
|
131
|
+
source_span_start: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
132
|
+
name: "source_span_start";
|
|
133
|
+
tableName: "corpus_observations";
|
|
134
|
+
dataType: "string";
|
|
135
|
+
columnType: "SQLiteText";
|
|
136
|
+
data: string;
|
|
137
|
+
driverParam: string;
|
|
138
|
+
notNull: false;
|
|
139
|
+
hasDefault: false;
|
|
140
|
+
isPrimaryKey: false;
|
|
141
|
+
isAutoincrement: false;
|
|
142
|
+
hasRuntimeDefault: false;
|
|
143
|
+
enumValues: [string, ...string[]];
|
|
144
|
+
baseColumn: never;
|
|
145
|
+
identity: undefined;
|
|
146
|
+
generated: undefined;
|
|
147
|
+
}, {}, {
|
|
148
|
+
length: number | undefined;
|
|
149
|
+
}>;
|
|
150
|
+
source_span_end: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
151
|
+
name: "source_span_end";
|
|
152
|
+
tableName: "corpus_observations";
|
|
153
|
+
dataType: "string";
|
|
154
|
+
columnType: "SQLiteText";
|
|
155
|
+
data: string;
|
|
156
|
+
driverParam: string;
|
|
157
|
+
notNull: false;
|
|
158
|
+
hasDefault: false;
|
|
159
|
+
isPrimaryKey: false;
|
|
160
|
+
isAutoincrement: false;
|
|
161
|
+
hasRuntimeDefault: false;
|
|
162
|
+
enumValues: [string, ...string[]];
|
|
163
|
+
baseColumn: never;
|
|
164
|
+
identity: undefined;
|
|
165
|
+
generated: undefined;
|
|
166
|
+
}, {}, {
|
|
167
|
+
length: number | undefined;
|
|
168
|
+
}>;
|
|
169
|
+
content: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
170
|
+
name: "content";
|
|
171
|
+
tableName: "corpus_observations";
|
|
172
|
+
dataType: "string";
|
|
173
|
+
columnType: "SQLiteText";
|
|
174
|
+
data: string;
|
|
175
|
+
driverParam: string;
|
|
176
|
+
notNull: true;
|
|
177
|
+
hasDefault: false;
|
|
178
|
+
isPrimaryKey: false;
|
|
179
|
+
isAutoincrement: false;
|
|
180
|
+
hasRuntimeDefault: false;
|
|
181
|
+
enumValues: [string, ...string[]];
|
|
182
|
+
baseColumn: never;
|
|
183
|
+
identity: undefined;
|
|
184
|
+
generated: undefined;
|
|
185
|
+
}, {}, {
|
|
186
|
+
length: number | undefined;
|
|
187
|
+
}>;
|
|
188
|
+
confidence: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
189
|
+
name: "confidence";
|
|
190
|
+
tableName: "corpus_observations";
|
|
191
|
+
dataType: "number";
|
|
192
|
+
columnType: "SQLiteReal";
|
|
193
|
+
data: number;
|
|
194
|
+
driverParam: number;
|
|
195
|
+
notNull: false;
|
|
196
|
+
hasDefault: false;
|
|
197
|
+
isPrimaryKey: false;
|
|
198
|
+
isAutoincrement: false;
|
|
199
|
+
hasRuntimeDefault: false;
|
|
200
|
+
enumValues: undefined;
|
|
201
|
+
baseColumn: never;
|
|
202
|
+
identity: undefined;
|
|
203
|
+
generated: undefined;
|
|
204
|
+
}, {}, {}>;
|
|
205
|
+
observed_at: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
206
|
+
name: "observed_at";
|
|
207
|
+
tableName: "corpus_observations";
|
|
208
|
+
dataType: "string";
|
|
209
|
+
columnType: "SQLiteText";
|
|
210
|
+
data: string;
|
|
211
|
+
driverParam: string;
|
|
212
|
+
notNull: false;
|
|
213
|
+
hasDefault: false;
|
|
214
|
+
isPrimaryKey: false;
|
|
215
|
+
isAutoincrement: false;
|
|
216
|
+
hasRuntimeDefault: false;
|
|
217
|
+
enumValues: [string, ...string[]];
|
|
218
|
+
baseColumn: never;
|
|
219
|
+
identity: undefined;
|
|
220
|
+
generated: undefined;
|
|
221
|
+
}, {}, {
|
|
222
|
+
length: number | undefined;
|
|
223
|
+
}>;
|
|
224
|
+
created_at: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
225
|
+
name: "created_at";
|
|
226
|
+
tableName: "corpus_observations";
|
|
227
|
+
dataType: "string";
|
|
228
|
+
columnType: "SQLiteText";
|
|
229
|
+
data: string;
|
|
230
|
+
driverParam: string;
|
|
231
|
+
notNull: true;
|
|
232
|
+
hasDefault: false;
|
|
233
|
+
isPrimaryKey: false;
|
|
234
|
+
isAutoincrement: false;
|
|
235
|
+
hasRuntimeDefault: false;
|
|
236
|
+
enumValues: [string, ...string[]];
|
|
237
|
+
baseColumn: never;
|
|
238
|
+
identity: undefined;
|
|
239
|
+
generated: undefined;
|
|
240
|
+
}, {}, {
|
|
241
|
+
length: number | undefined;
|
|
242
|
+
}>;
|
|
243
|
+
derived_from: import("drizzle-orm/sqlite-core").SQLiteColumn<{
|
|
244
|
+
name: "derived_from";
|
|
245
|
+
tableName: "corpus_observations";
|
|
246
|
+
dataType: "string";
|
|
247
|
+
columnType: "SQLiteText";
|
|
248
|
+
data: string;
|
|
249
|
+
driverParam: string;
|
|
250
|
+
notNull: false;
|
|
251
|
+
hasDefault: false;
|
|
252
|
+
isPrimaryKey: false;
|
|
253
|
+
isAutoincrement: false;
|
|
254
|
+
hasRuntimeDefault: false;
|
|
255
|
+
enumValues: [string, ...string[]];
|
|
256
|
+
baseColumn: never;
|
|
257
|
+
identity: undefined;
|
|
258
|
+
generated: undefined;
|
|
259
|
+
}, {}, {
|
|
260
|
+
length: number | undefined;
|
|
261
|
+
}>;
|
|
262
|
+
};
|
|
263
|
+
dialect: "sqlite";
|
|
264
|
+
}>;
|
|
265
|
+
export type ObservationRow = typeof corpus_observations.$inferSelect;
|
|
266
|
+
export type ObservationInsert = typeof corpus_observations.$inferInsert;
|
|
267
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../observations/schema.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA0B7B,CAAA;AAEH,MAAM,MAAM,cAAc,GAAG,OAAO,mBAAmB,CAAC,YAAY,CAAA;AACpE,MAAM,MAAM,iBAAiB,GAAG,OAAO,mBAAmB,CAAC,YAAY,CAAA"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module ObservationSchema
|
|
3
|
+
* @description Database schema definitions for observations using Drizzle ORM.
|
|
4
|
+
*/
|
|
5
|
+
import { sqliteTable, text, real, index } from 'drizzle-orm/sqlite-core';
|
|
6
|
+
/**
|
|
7
|
+
* Drizzle ORM schema for the corpus_observations table.
|
|
8
|
+
*
|
|
9
|
+
* Used by the Cloudflare backend with D1 (SQLite). Defines the table structure
|
|
10
|
+
* for storing observation metadata linking structured facts to versioned content.
|
|
11
|
+
*
|
|
12
|
+
* Columns:
|
|
13
|
+
* - `id` - Primary key (unique observation identifier)
|
|
14
|
+
* - `type` - Observation type name (e.g., 'entity_mention')
|
|
15
|
+
* - `source_store_id` + `source_version` - Points to the snapshot this observation is about
|
|
16
|
+
* - `source_path` - Optional JSONPath expression to specific element
|
|
17
|
+
* - `source_span_start` / `source_span_end` - Optional character range within text
|
|
18
|
+
* - `content` - JSON-encoded observation data
|
|
19
|
+
* - `confidence` - Optional confidence score (0.0 to 1.0)
|
|
20
|
+
* - `observed_at` - When the observation was made (ISO 8601)
|
|
21
|
+
* - `created_at` - When the record was stored (ISO 8601)
|
|
22
|
+
* - `derived_from` - Optional JSON array of SnapshotPointers for provenance
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```ts
|
|
26
|
+
* import { drizzle } from 'drizzle-orm/d1'
|
|
27
|
+
* import { corpus_observations } from 'corpus/observation-schema'
|
|
28
|
+
*
|
|
29
|
+
* const db = drizzle(env.D1)
|
|
30
|
+
* const rows = await db.select().from(corpus_observations).where(eq(corpus_observations.type, 'entity_mention'))
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export const corpus_observations = sqliteTable('corpus_observations', {
|
|
34
|
+
id: text('id').primaryKey(),
|
|
35
|
+
type: text('type').notNull(),
|
|
36
|
+
// Source pointer
|
|
37
|
+
source_store_id: text('source_store_id').notNull(),
|
|
38
|
+
source_version: text('source_version').notNull(),
|
|
39
|
+
source_path: text('source_path'),
|
|
40
|
+
source_span_start: text('source_span_start'),
|
|
41
|
+
source_span_end: text('source_span_end'),
|
|
42
|
+
// Content
|
|
43
|
+
content: text('content').notNull(),
|
|
44
|
+
confidence: real('confidence'),
|
|
45
|
+
// Timestamps
|
|
46
|
+
observed_at: text('observed_at'),
|
|
47
|
+
created_at: text('created_at').notNull(),
|
|
48
|
+
// Derivation lineage
|
|
49
|
+
derived_from: text('derived_from'),
|
|
50
|
+
}, (table) => ({
|
|
51
|
+
type_idx: index('idx_obs_type').on(table.type),
|
|
52
|
+
source_idx: index('idx_obs_source').on(table.source_store_id, table.source_version),
|
|
53
|
+
type_observed_idx: index('idx_obs_type_observed').on(table.type, table.observed_at),
|
|
54
|
+
type_source_idx: index('idx_obs_type_source').on(table.type, table.source_store_id),
|
|
55
|
+
}));
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module ObservationsStorage
|
|
3
|
+
* @description Raw storage interface and row conversion utilities for observations.
|
|
4
|
+
*/
|
|
5
|
+
import type { Result, CorpusError } from '../types';
|
|
6
|
+
import type { ObservationRow } from './schema';
|
|
7
|
+
import type { Observation, ObservationMeta, SnapshotPointer } from './types';
|
|
8
|
+
/**
|
|
9
|
+
* Query options for raw storage operations.
|
|
10
|
+
* Dates are ISO strings at the storage layer.
|
|
11
|
+
*/
|
|
12
|
+
export type StorageQueryOpts = {
|
|
13
|
+
type?: string | string[];
|
|
14
|
+
source_store_id?: string;
|
|
15
|
+
source_version?: string;
|
|
16
|
+
source_prefix?: string;
|
|
17
|
+
created_after?: string;
|
|
18
|
+
created_before?: string;
|
|
19
|
+
observed_after?: string;
|
|
20
|
+
observed_before?: string;
|
|
21
|
+
limit?: number;
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* Raw storage adapter for observation rows.
|
|
25
|
+
* Backends implement this thin interface; all business logic lives in the client.
|
|
26
|
+
*/
|
|
27
|
+
export type ObservationsStorage = {
|
|
28
|
+
/** Store a row. Returns the row on success. */
|
|
29
|
+
put_row: (row: ObservationRow) => Promise<Result<ObservationRow, CorpusError>>;
|
|
30
|
+
/** Get a row by ID. Returns null if not found. */
|
|
31
|
+
get_row: (id: string) => Promise<Result<ObservationRow | null, CorpusError>>;
|
|
32
|
+
/** Query rows with optional filters. */
|
|
33
|
+
query_rows: (opts?: StorageQueryOpts) => AsyncIterable<ObservationRow>;
|
|
34
|
+
/** Delete a row by ID. Returns true if deleted, false if not found. */
|
|
35
|
+
delete_row: (id: string) => Promise<Result<boolean, CorpusError>>;
|
|
36
|
+
/** Delete rows matching source. Returns count deleted. */
|
|
37
|
+
delete_by_source: (store_id: string, version: string, path?: string) => Promise<Result<number, CorpusError>>;
|
|
38
|
+
};
|
|
39
|
+
/**
|
|
40
|
+
* Convert a storage row to an Observation (includes content).
|
|
41
|
+
*/
|
|
42
|
+
export declare function row_to_observation(row: ObservationRow): Observation;
|
|
43
|
+
/**
|
|
44
|
+
* Convert a storage row to ObservationMeta (excludes content).
|
|
45
|
+
*/
|
|
46
|
+
export declare function row_to_meta(row: ObservationRow): ObservationMeta;
|
|
47
|
+
/**
|
|
48
|
+
* Create an ObservationRow from put options.
|
|
49
|
+
*/
|
|
50
|
+
export declare function create_observation_row(id: string, type_name: string, source: SnapshotPointer, content: unknown, opts: {
|
|
51
|
+
confidence?: number;
|
|
52
|
+
observed_at?: Date;
|
|
53
|
+
derived_from?: SnapshotPointer[];
|
|
54
|
+
}): ObservationRow;
|
|
55
|
+
/**
|
|
56
|
+
* Filter and sort observation rows based on query options.
|
|
57
|
+
* Used by in-memory storage implementations (memory backend, file backend).
|
|
58
|
+
*/
|
|
59
|
+
export declare function filter_observation_rows(rows: ObservationRow[], opts?: StorageQueryOpts): ObservationRow[];
|
|
60
|
+
/**
|
|
61
|
+
* Simple CRUD interface for observation storage backends.
|
|
62
|
+
*/
|
|
63
|
+
export type ObservationsCRUD = {
|
|
64
|
+
get_all: () => Promise<ObservationRow[]>;
|
|
65
|
+
set_all: (rows: ObservationRow[]) => Promise<void>;
|
|
66
|
+
get_one: (id: string) => Promise<ObservationRow | null>;
|
|
67
|
+
add_one: (row: ObservationRow) => Promise<void>;
|
|
68
|
+
remove_one: (id: string) => Promise<boolean>;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Create an ObservationsStorage from simple CRUD operations.
|
|
72
|
+
* Used by memory and file backends.
|
|
73
|
+
*/
|
|
74
|
+
export declare function create_observations_storage(crud: ObservationsCRUD): ObservationsStorage;
|
|
75
|
+
//# sourceMappingURL=storage.d.ts.map
|