@f0rbit/corpus 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/backend/cloudflare.d.ts.map +1 -1
  2. package/dist/backend/cloudflare.js +140 -23
  3. package/dist/backend/file.d.ts.map +1 -1
  4. package/dist/backend/file.js +47 -48
  5. package/dist/backend/layered.d.ts.map +1 -1
  6. package/dist/backend/layered.js +67 -19
  7. package/dist/backend/memory.d.ts +2 -1
  8. package/dist/backend/memory.d.ts.map +1 -1
  9. package/dist/backend/memory.js +29 -43
  10. package/dist/corpus.d.ts +11 -0
  11. package/dist/corpus.d.ts.map +1 -1
  12. package/dist/corpus.js +52 -0
  13. package/dist/index.d.ts +2 -1
  14. package/dist/index.d.ts.map +1 -1
  15. package/dist/index.js +1 -0
  16. package/dist/observations/client.d.ts +12 -0
  17. package/dist/observations/client.d.ts.map +1 -0
  18. package/dist/observations/client.js +115 -0
  19. package/dist/observations/index.d.ts +12 -0
  20. package/dist/observations/index.d.ts.map +1 -0
  21. package/dist/observations/index.js +11 -0
  22. package/dist/observations/schema.d.ts +267 -0
  23. package/dist/observations/schema.d.ts.map +1 -0
  24. package/dist/observations/schema.js +55 -0
  25. package/dist/observations/storage.d.ts +75 -0
  26. package/dist/observations/storage.d.ts.map +1 -0
  27. package/dist/observations/storage.js +137 -0
  28. package/dist/observations/types.d.ts +219 -0
  29. package/dist/observations/types.d.ts.map +1 -0
  30. package/dist/observations/types.js +40 -0
  31. package/dist/observations/utils.d.ts +183 -0
  32. package/dist/observations/utils.d.ts.map +1 -0
  33. package/dist/observations/utils.js +272 -0
  34. package/dist/sst.d.ts +1 -1
  35. package/dist/sst.d.ts.map +1 -1
  36. package/dist/sst.js +20 -0
  37. package/dist/types.d.ts +61 -0
  38. package/dist/types.d.ts.map +1 -1
  39. package/dist/utils.d.ts +38 -1
  40. package/dist/utils.d.ts.map +1 -1
  41. package/dist/utils.js +84 -0
  42. package/package.json +1 -1
  43. package/dist/codecs.d.ts +0 -8
  44. package/dist/codecs.d.ts.map +0 -1
  45. package/dist/codecs.js +0 -6
  46. package/dist/core.d.ts +0 -9
  47. package/dist/core.d.ts.map +0 -1
  48. package/dist/core.js +0 -7
package/dist/corpus.js CHANGED
@@ -4,6 +4,7 @@
4
4
  */
5
5
  import { ok, err } from './types';
6
6
  import { compute_hash, generate_version } from './utils';
7
+ import { create_pointer, resolve_path, apply_span } from './observations/utils';
7
8
  /**
8
9
  * Creates a typed Store instance bound to a Backend.
9
10
  * @category Core
@@ -190,11 +191,23 @@ export function create_store(backend, definition) {
190
191
  * // Type-safe access to stores
191
192
  * await corpus.stores.users.put({ name: 'Alice', email: 'alice@example.com' })
192
193
  * await corpus.stores.notes.put('Hello, world!')
194
+ *
195
+ * // With observations
196
+ * const corpus_with_obs = create_corpus()
197
+ * .with_backend(create_memory_backend())
198
+ * .with_store(users)
199
+ * .with_observations([EntityType, SentimentType])
200
+ * .build()
201
+ *
202
+ * // Pointer utilities
203
+ * const pointer = corpus_with_obs.create_pointer('users', 'v123', '$.name')
204
+ * const value = await corpus_with_obs.resolve_pointer(pointer)
193
205
  * ```
194
206
  */
195
207
  export function create_corpus() {
196
208
  let backend = null;
197
209
  const definitions = [];
210
+ let observation_types = [];
198
211
  const builder = {
199
212
  with_backend(b) {
200
213
  backend = b;
@@ -204,6 +217,10 @@ export function create_corpus() {
204
217
  definitions.push(definition);
205
218
  return builder;
206
219
  },
220
+ with_observations(types) {
221
+ observation_types = types;
222
+ return builder;
223
+ },
207
224
  build() {
208
225
  if (!backend) {
209
226
  throw new Error('Backend is required. Call with_backend() first.');
@@ -213,10 +230,45 @@ export function create_corpus() {
213
230
  for (const def of definitions) {
214
231
  stores[def.id] = create_store(b, def);
215
232
  }
233
+ const observations_client = observation_types.length > 0 && 'observations' in b
234
+ ? b.observations
235
+ : undefined;
236
+ async function resolve_pointer_impl(pointer) {
237
+ const store = stores[pointer.store_id];
238
+ if (!store) {
239
+ return err({ kind: 'not_found', store_id: pointer.store_id, version: pointer.version });
240
+ }
241
+ const snapshot_result = await store.get(pointer.version);
242
+ if (!snapshot_result.ok)
243
+ return snapshot_result;
244
+ let value = snapshot_result.value.data;
245
+ if (pointer.path) {
246
+ const path_result = resolve_path(value, pointer.path);
247
+ if (!path_result.ok)
248
+ return path_result;
249
+ value = path_result.value;
250
+ }
251
+ if (pointer.span && typeof value === 'string') {
252
+ const span_result = apply_span(value, pointer.span);
253
+ if (!span_result.ok)
254
+ return span_result;
255
+ value = span_result.value;
256
+ }
257
+ return ok(value);
258
+ }
259
+ async function is_superseded_impl(pointer) {
260
+ if (!observations_client?.is_stale)
261
+ return false;
262
+ return observations_client.is_stale(pointer);
263
+ }
216
264
  return {
217
265
  stores,
218
266
  metadata: b.metadata,
219
267
  data: b.data,
268
+ observations: observations_client,
269
+ create_pointer,
270
+ resolve_pointer: resolve_pointer_impl,
271
+ is_superseded: is_superseded_impl,
220
272
  };
221
273
  },
222
274
  };
package/dist/index.d.ts CHANGED
@@ -5,7 +5,8 @@ export { create_cloudflare_backend, type CloudflareBackendConfig } from './backe
5
5
  export { create_layered_backend, type LayeredBackendOptions } from './backend/layered';
6
6
  export { json_codec, text_codec, binary_codec, compute_hash, generate_version } from './utils';
7
7
  export { corpus_snapshots, type CorpusSnapshotRow, type CorpusSnapshotInsert } from './schema';
8
- export type { ContentType, ParentRef, SnapshotMeta, Snapshot, DataHandle, MetadataClient, DataClient, ListOpts, Backend, Codec, Store, StoreDefinition, DefineStoreOpts, DataKeyContext, PutOpts, CorpusBuilder, Corpus, CorpusError, Result, CorpusEvent, EventHandler, } from './types';
8
+ export type { ContentType, ParentRef, SnapshotMeta, Snapshot, DataHandle, MetadataClient, DataClient, ListOpts, Backend, Codec, Store, StoreDefinition, DefineStoreOpts, DataKeyContext, PutOpts, CorpusBuilder, Corpus, CorpusError, Result, CorpusEvent, EventHandler, ObservationsClient, } from './types';
9
9
  export { ok, err, define_store } from './types';
10
+ export * from './observations';
10
11
  export { createCorpusInfra, CORPUS_MIGRATION_SQL, type CorpusInfra, type CorpusInfraConfig } from './sst';
11
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAEtD,OAAO,EAAE,qBAAqB,EAAE,KAAK,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACnF,OAAO,EAAE,mBAAmB,EAAE,KAAK,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,yBAAyB,EAAE,KAAK,uBAAuB,EAAE,MAAM,sBAAsB,CAAA;AAC9F,OAAO,EAAE,sBAAsB,EAAE,KAAK,qBAAqB,EAAE,MAAM,mBAAmB,CAAA;AAEtF,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAE9F,OAAO,EAAE,gBAAgB,EAAE,KAAK,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,MAAM,UAAU,CAAA;AAE9F,YAAY,EACV,WAAW,EACX,SAAS,EACT,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,cAAc,EACd,UAAU,EACV,QAAQ,EACR,OAAO,EACP,KAAK,EACL,KAAK,EACL,eAAe,EACf,eAAe,EACf,cAAc,EACd,OAAO,EACP,aAAa,EACb,MAAM,EACN,WAAW,EACX,MAAM,EACN,WAAW,EACX,YAAY,GACb,MAAM,SAAS,CAAA;AAEhB,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAE/C,OAAO,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,KAAK,WAAW,EAAE,KAAK,iBAAiB,EAAE,MAAM,OAAO,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,UAAU,CAAA;AAEtD,OAAO,EAAE,qBAAqB,EAAE,KAAK,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACnF,OAAO,EAAE,mBAAmB,EAAE,KAAK,iBAAiB,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,yBAAyB,EAAE,KAAK,uBAAuB,EAAE,MAAM,sBAAsB,CAAA;AAC9F,OAAO,EAAE,sBAAsB,EAAE,KAAK,qBAAqB,EAAE,MAAM,mBAAmB,CAAA;AAEtF,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAE9F,OAAO,EAAE,gBAAgB,EAAE,KAAK,iBAAiB,EAAE,KAAK,oBAAoB,EAAE,MAAM,UAAU,CAAA;AAE9F,YAAY,EACV,WAAW,EACX,SAAS,EACT,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,cAAc,EACd,UAAU,EACV,QAAQ,EACR,OAAO,EACP,KAAK,EACL,KAAK,EACL,eAAe,EACf,eAAe,EACf,cAAc,EACd,OAAO,EACP,aAAa,EACb,MAAM,EACN,WAAW,EACX,MAAM,EACN,WAAW,EACX,YAAY,EACZ,kBAAkB,GACnB,MAAM,SAAS,CAAA;AAEhB,OAAO,EAAE,EAAE,EAAE,GAAG,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAE/C,cAAc,gBAAgB,CAAA;AAE9B,OAAO,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,KAAK,WAAW,EAAE,KAAK,iBAAiB,EAAE,MAAM,OAAO,CAAA"}
package/dist/index.js CHANGED
@@ -6,4 +6,5 @@ export { create_layered_backend } from './backend/layered';
6
6
  export { json_codec, text_codec, binary_codec, compute_hash, generate_version } from './utils';
7
7
  export { corpus_snapshots } from './schema';
8
8
  export { ok, err, define_store } from './types';
9
+ export * from './observations';
9
10
  export { createCorpusInfra, CORPUS_MIGRATION_SQL } from './sst';
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @module ObservationsClient
3
+ * @description Centralized business logic for observations, built on storage adapters.
4
+ */
5
+ import type { MetadataClient, ObservationsClient } from '../types';
6
+ import type { ObservationsStorage } from './storage';
7
+ /**
8
+ * Creates an ObservationsClient from a storage adapter.
9
+ * All business logic (validation, staleness, etc.) is centralized here.
10
+ */
11
+ export declare function create_observations_client(storage: ObservationsStorage, metadata: MetadataClient): ObservationsClient;
12
+ //# sourceMappingURL=client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../observations/client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAuB,cAAc,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAA;AAEvF,OAAO,KAAK,EAAE,mBAAmB,EAAoB,MAAM,WAAW,CAAA;AAuBtE;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,mBAAmB,EAC5B,QAAQ,EAAE,cAAc,GACvB,kBAAkB,CAqGpB"}
@@ -0,0 +1,115 @@
1
+ /**
2
+ * @module ObservationsClient
3
+ * @description Centralized business logic for observations, built on storage adapters.
4
+ */
5
+ import { row_to_observation, row_to_meta, create_observation_row } from './storage';
6
+ import { generate_observation_id } from './utils';
7
+ import { ok, err } from '../types';
8
+ /**
9
+ * Convert client query opts to storage query opts.
10
+ * Handles Date -> ISO string conversion.
11
+ */
12
+ function to_storage_opts(opts) {
13
+ return {
14
+ type: opts.type,
15
+ source_store_id: opts.source_store,
16
+ source_version: opts.source_version,
17
+ source_prefix: opts.source_prefix,
18
+ created_after: opts.created_after?.toISOString(),
19
+ created_before: opts.created_before?.toISOString(),
20
+ observed_after: opts.after?.toISOString(),
21
+ observed_before: opts.before?.toISOString(),
22
+ limit: opts.limit
23
+ };
24
+ }
25
+ /**
26
+ * Creates an ObservationsClient from a storage adapter.
27
+ * All business logic (validation, staleness, etc.) is centralized here.
28
+ */
29
+ export function create_observations_client(storage, metadata) {
30
+ async function get_latest_version(store_id) {
31
+ const result = await metadata.get_latest(store_id);
32
+ return result.ok ? result.value.version : null;
33
+ }
34
+ return {
35
+ async put(type, opts) {
36
+ const validation = type.schema.safeParse(opts.content);
37
+ if (!validation.success) {
38
+ return err({
39
+ kind: 'validation_error',
40
+ cause: validation.error,
41
+ message: validation.error.message
42
+ });
43
+ }
44
+ const id = generate_observation_id();
45
+ const row = create_observation_row(id, type.name, opts.source, validation.data, {
46
+ confidence: opts.confidence,
47
+ observed_at: opts.observed_at,
48
+ derived_from: opts.derived_from
49
+ });
50
+ const result = await storage.put_row(row);
51
+ if (!result.ok)
52
+ return result;
53
+ const observation = {
54
+ id,
55
+ type: type.name,
56
+ source: opts.source,
57
+ content: validation.data,
58
+ ...(opts.confidence !== undefined && { confidence: opts.confidence }),
59
+ ...(opts.observed_at && { observed_at: opts.observed_at }),
60
+ created_at: new Date(row.created_at),
61
+ ...(opts.derived_from && { derived_from: opts.derived_from })
62
+ };
63
+ return ok(observation);
64
+ },
65
+ async get(id) {
66
+ const result = await storage.get_row(id);
67
+ if (!result.ok)
68
+ return result;
69
+ if (!result.value) {
70
+ return err({ kind: 'observation_not_found', id });
71
+ }
72
+ return ok(row_to_observation(result.value));
73
+ },
74
+ async *query(opts = {}) {
75
+ const storageOpts = to_storage_opts(opts);
76
+ for await (const row of storage.query_rows(storageOpts)) {
77
+ if (!opts.include_stale) {
78
+ const latest = await get_latest_version(row.source_store_id);
79
+ if (latest && row.source_version !== latest)
80
+ continue;
81
+ }
82
+ yield row_to_observation(row);
83
+ }
84
+ },
85
+ async *query_meta(opts = {}) {
86
+ const storageOpts = to_storage_opts(opts);
87
+ for await (const row of storage.query_rows(storageOpts)) {
88
+ if (!opts.include_stale) {
89
+ const latest = await get_latest_version(row.source_store_id);
90
+ if (latest && row.source_version !== latest)
91
+ continue;
92
+ }
93
+ yield row_to_meta(row);
94
+ }
95
+ },
96
+ async delete(id) {
97
+ const result = await storage.delete_row(id);
98
+ if (!result.ok)
99
+ return result;
100
+ if (!result.value) {
101
+ return err({ kind: 'observation_not_found', id });
102
+ }
103
+ return ok(undefined);
104
+ },
105
+ async delete_by_source(source) {
106
+ return storage.delete_by_source(source.store_id, source.version, source.path);
107
+ },
108
+ async is_stale(pointer) {
109
+ const latest = await get_latest_version(pointer.store_id);
110
+ if (!latest)
111
+ return false;
112
+ return pointer.version !== latest;
113
+ }
114
+ };
115
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @module Observations
3
+ * @description Re-exports for the observations feature.
4
+ */
5
+ export * from './types';
6
+ export type { ObservationRow, ObservationInsert } from './schema';
7
+ export { corpus_observations } from './schema';
8
+ export type { ObservationsStorage, StorageQueryOpts, ObservationsCRUD } from './storage';
9
+ export { row_to_observation, row_to_meta, create_observation_row, filter_observation_rows, create_observations_storage } from './storage';
10
+ export { create_observations_client } from './client';
11
+ export * from './utils';
12
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../observations/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,SAAS,CAAA;AACvB,YAAY,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAA;AACjE,OAAO,EAAE,mBAAmB,EAAE,MAAM,UAAU,CAAA;AAC9C,YAAY,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAA;AAGxF,OAAO,EAAE,kBAAkB,EAAE,WAAW,EAAE,sBAAsB,EAAE,uBAAuB,EAAE,2BAA2B,EAAE,MAAM,WAAW,CAAA;AACzI,OAAO,EAAE,0BAA0B,EAAE,MAAM,UAAU,CAAA;AACrD,cAAc,SAAS,CAAA"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @module Observations
3
+ * @description Re-exports for the observations feature.
4
+ */
5
+ // Types
6
+ export * from './types';
7
+ export { corpus_observations } from './schema';
8
+ // Functions
9
+ export { row_to_observation, row_to_meta, create_observation_row, filter_observation_rows, create_observations_storage } from './storage';
10
+ export { create_observations_client } from './client';
11
+ export * from './utils';
@@ -0,0 +1,267 @@
1
+ /**
2
+ * @module ObservationSchema
3
+ * @description Database schema definitions for observations using Drizzle ORM.
4
+ */
5
+ /**
6
+ * Drizzle ORM schema for the corpus_observations table.
7
+ *
8
+ * Used by the Cloudflare backend with D1 (SQLite). Defines the table structure
9
+ * for storing observation metadata linking structured facts to versioned content.
10
+ *
11
+ * Columns:
12
+ * - `id` - Primary key (unique observation identifier)
13
+ * - `type` - Observation type name (e.g., 'entity_mention')
14
+ * - `source_store_id` + `source_version` - Points to the snapshot this observation is about
15
+ * - `source_path` - Optional JSONPath expression to specific element
16
+ * - `source_span_start` / `source_span_end` - Optional character range within text
17
+ * - `content` - JSON-encoded observation data
18
+ * - `confidence` - Optional confidence score (0.0 to 1.0)
19
+ * - `observed_at` - When the observation was made (ISO 8601)
20
+ * - `created_at` - When the record was stored (ISO 8601)
21
+ * - `derived_from` - Optional JSON array of SnapshotPointers for provenance
22
+ *
23
+ * @example
24
+ * ```ts
25
+ * import { drizzle } from 'drizzle-orm/d1'
26
+ * import { corpus_observations } from 'corpus/observation-schema'
27
+ *
28
+ * const db = drizzle(env.D1)
29
+ * const rows = await db.select().from(corpus_observations).where(eq(corpus_observations.type, 'entity_mention'))
30
+ * ```
31
+ */
32
+ export declare const corpus_observations: import("drizzle-orm/sqlite-core").SQLiteTableWithColumns<{
33
+ name: "corpus_observations";
34
+ schema: undefined;
35
+ columns: {
36
+ id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
37
+ name: "id";
38
+ tableName: "corpus_observations";
39
+ dataType: "string";
40
+ columnType: "SQLiteText";
41
+ data: string;
42
+ driverParam: string;
43
+ notNull: true;
44
+ hasDefault: false;
45
+ isPrimaryKey: true;
46
+ isAutoincrement: false;
47
+ hasRuntimeDefault: false;
48
+ enumValues: [string, ...string[]];
49
+ baseColumn: never;
50
+ identity: undefined;
51
+ generated: undefined;
52
+ }, {}, {
53
+ length: number | undefined;
54
+ }>;
55
+ type: import("drizzle-orm/sqlite-core").SQLiteColumn<{
56
+ name: "type";
57
+ tableName: "corpus_observations";
58
+ dataType: "string";
59
+ columnType: "SQLiteText";
60
+ data: string;
61
+ driverParam: string;
62
+ notNull: true;
63
+ hasDefault: false;
64
+ isPrimaryKey: false;
65
+ isAutoincrement: false;
66
+ hasRuntimeDefault: false;
67
+ enumValues: [string, ...string[]];
68
+ baseColumn: never;
69
+ identity: undefined;
70
+ generated: undefined;
71
+ }, {}, {
72
+ length: number | undefined;
73
+ }>;
74
+ source_store_id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
75
+ name: "source_store_id";
76
+ tableName: "corpus_observations";
77
+ dataType: "string";
78
+ columnType: "SQLiteText";
79
+ data: string;
80
+ driverParam: string;
81
+ notNull: true;
82
+ hasDefault: false;
83
+ isPrimaryKey: false;
84
+ isAutoincrement: false;
85
+ hasRuntimeDefault: false;
86
+ enumValues: [string, ...string[]];
87
+ baseColumn: never;
88
+ identity: undefined;
89
+ generated: undefined;
90
+ }, {}, {
91
+ length: number | undefined;
92
+ }>;
93
+ source_version: import("drizzle-orm/sqlite-core").SQLiteColumn<{
94
+ name: "source_version";
95
+ tableName: "corpus_observations";
96
+ dataType: "string";
97
+ columnType: "SQLiteText";
98
+ data: string;
99
+ driverParam: string;
100
+ notNull: true;
101
+ hasDefault: false;
102
+ isPrimaryKey: false;
103
+ isAutoincrement: false;
104
+ hasRuntimeDefault: false;
105
+ enumValues: [string, ...string[]];
106
+ baseColumn: never;
107
+ identity: undefined;
108
+ generated: undefined;
109
+ }, {}, {
110
+ length: number | undefined;
111
+ }>;
112
+ source_path: import("drizzle-orm/sqlite-core").SQLiteColumn<{
113
+ name: "source_path";
114
+ tableName: "corpus_observations";
115
+ dataType: "string";
116
+ columnType: "SQLiteText";
117
+ data: string;
118
+ driverParam: string;
119
+ notNull: false;
120
+ hasDefault: false;
121
+ isPrimaryKey: false;
122
+ isAutoincrement: false;
123
+ hasRuntimeDefault: false;
124
+ enumValues: [string, ...string[]];
125
+ baseColumn: never;
126
+ identity: undefined;
127
+ generated: undefined;
128
+ }, {}, {
129
+ length: number | undefined;
130
+ }>;
131
+ source_span_start: import("drizzle-orm/sqlite-core").SQLiteColumn<{
132
+ name: "source_span_start";
133
+ tableName: "corpus_observations";
134
+ dataType: "string";
135
+ columnType: "SQLiteText";
136
+ data: string;
137
+ driverParam: string;
138
+ notNull: false;
139
+ hasDefault: false;
140
+ isPrimaryKey: false;
141
+ isAutoincrement: false;
142
+ hasRuntimeDefault: false;
143
+ enumValues: [string, ...string[]];
144
+ baseColumn: never;
145
+ identity: undefined;
146
+ generated: undefined;
147
+ }, {}, {
148
+ length: number | undefined;
149
+ }>;
150
+ source_span_end: import("drizzle-orm/sqlite-core").SQLiteColumn<{
151
+ name: "source_span_end";
152
+ tableName: "corpus_observations";
153
+ dataType: "string";
154
+ columnType: "SQLiteText";
155
+ data: string;
156
+ driverParam: string;
157
+ notNull: false;
158
+ hasDefault: false;
159
+ isPrimaryKey: false;
160
+ isAutoincrement: false;
161
+ hasRuntimeDefault: false;
162
+ enumValues: [string, ...string[]];
163
+ baseColumn: never;
164
+ identity: undefined;
165
+ generated: undefined;
166
+ }, {}, {
167
+ length: number | undefined;
168
+ }>;
169
+ content: import("drizzle-orm/sqlite-core").SQLiteColumn<{
170
+ name: "content";
171
+ tableName: "corpus_observations";
172
+ dataType: "string";
173
+ columnType: "SQLiteText";
174
+ data: string;
175
+ driverParam: string;
176
+ notNull: true;
177
+ hasDefault: false;
178
+ isPrimaryKey: false;
179
+ isAutoincrement: false;
180
+ hasRuntimeDefault: false;
181
+ enumValues: [string, ...string[]];
182
+ baseColumn: never;
183
+ identity: undefined;
184
+ generated: undefined;
185
+ }, {}, {
186
+ length: number | undefined;
187
+ }>;
188
+ confidence: import("drizzle-orm/sqlite-core").SQLiteColumn<{
189
+ name: "confidence";
190
+ tableName: "corpus_observations";
191
+ dataType: "number";
192
+ columnType: "SQLiteReal";
193
+ data: number;
194
+ driverParam: number;
195
+ notNull: false;
196
+ hasDefault: false;
197
+ isPrimaryKey: false;
198
+ isAutoincrement: false;
199
+ hasRuntimeDefault: false;
200
+ enumValues: undefined;
201
+ baseColumn: never;
202
+ identity: undefined;
203
+ generated: undefined;
204
+ }, {}, {}>;
205
+ observed_at: import("drizzle-orm/sqlite-core").SQLiteColumn<{
206
+ name: "observed_at";
207
+ tableName: "corpus_observations";
208
+ dataType: "string";
209
+ columnType: "SQLiteText";
210
+ data: string;
211
+ driverParam: string;
212
+ notNull: false;
213
+ hasDefault: false;
214
+ isPrimaryKey: false;
215
+ isAutoincrement: false;
216
+ hasRuntimeDefault: false;
217
+ enumValues: [string, ...string[]];
218
+ baseColumn: never;
219
+ identity: undefined;
220
+ generated: undefined;
221
+ }, {}, {
222
+ length: number | undefined;
223
+ }>;
224
+ created_at: import("drizzle-orm/sqlite-core").SQLiteColumn<{
225
+ name: "created_at";
226
+ tableName: "corpus_observations";
227
+ dataType: "string";
228
+ columnType: "SQLiteText";
229
+ data: string;
230
+ driverParam: string;
231
+ notNull: true;
232
+ hasDefault: false;
233
+ isPrimaryKey: false;
234
+ isAutoincrement: false;
235
+ hasRuntimeDefault: false;
236
+ enumValues: [string, ...string[]];
237
+ baseColumn: never;
238
+ identity: undefined;
239
+ generated: undefined;
240
+ }, {}, {
241
+ length: number | undefined;
242
+ }>;
243
+ derived_from: import("drizzle-orm/sqlite-core").SQLiteColumn<{
244
+ name: "derived_from";
245
+ tableName: "corpus_observations";
246
+ dataType: "string";
247
+ columnType: "SQLiteText";
248
+ data: string;
249
+ driverParam: string;
250
+ notNull: false;
251
+ hasDefault: false;
252
+ isPrimaryKey: false;
253
+ isAutoincrement: false;
254
+ hasRuntimeDefault: false;
255
+ enumValues: [string, ...string[]];
256
+ baseColumn: never;
257
+ identity: undefined;
258
+ generated: undefined;
259
+ }, {}, {
260
+ length: number | undefined;
261
+ }>;
262
+ };
263
+ dialect: "sqlite";
264
+ }>;
265
+ export type ObservationRow = typeof corpus_observations.$inferSelect;
266
+ export type ObservationInsert = typeof corpus_observations.$inferInsert;
267
+ //# sourceMappingURL=schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../observations/schema.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA0B7B,CAAA;AAEH,MAAM,MAAM,cAAc,GAAG,OAAO,mBAAmB,CAAC,YAAY,CAAA;AACpE,MAAM,MAAM,iBAAiB,GAAG,OAAO,mBAAmB,CAAC,YAAY,CAAA"}
@@ -0,0 +1,55 @@
1
+ /**
2
+ * @module ObservationSchema
3
+ * @description Database schema definitions for observations using Drizzle ORM.
4
+ */
5
+ import { sqliteTable, text, real, index } from 'drizzle-orm/sqlite-core';
6
+ /**
7
+ * Drizzle ORM schema for the corpus_observations table.
8
+ *
9
+ * Used by the Cloudflare backend with D1 (SQLite). Defines the table structure
10
+ * for storing observation metadata linking structured facts to versioned content.
11
+ *
12
+ * Columns:
13
+ * - `id` - Primary key (unique observation identifier)
14
+ * - `type` - Observation type name (e.g., 'entity_mention')
15
+ * - `source_store_id` + `source_version` - Points to the snapshot this observation is about
16
+ * - `source_path` - Optional JSONPath expression to specific element
17
+ * - `source_span_start` / `source_span_end` - Optional character range within text
18
+ * - `content` - JSON-encoded observation data
19
+ * - `confidence` - Optional confidence score (0.0 to 1.0)
20
+ * - `observed_at` - When the observation was made (ISO 8601)
21
+ * - `created_at` - When the record was stored (ISO 8601)
22
+ * - `derived_from` - Optional JSON array of SnapshotPointers for provenance
23
+ *
24
+ * @example
25
+ * ```ts
26
+ * import { drizzle } from 'drizzle-orm/d1'
27
+ * import { corpus_observations } from 'corpus/observation-schema'
28
+ *
29
+ * const db = drizzle(env.D1)
30
+ * const rows = await db.select().from(corpus_observations).where(eq(corpus_observations.type, 'entity_mention'))
31
+ * ```
32
+ */
33
+ export const corpus_observations = sqliteTable('corpus_observations', {
34
+ id: text('id').primaryKey(),
35
+ type: text('type').notNull(),
36
+ // Source pointer
37
+ source_store_id: text('source_store_id').notNull(),
38
+ source_version: text('source_version').notNull(),
39
+ source_path: text('source_path'),
40
+ source_span_start: text('source_span_start'),
41
+ source_span_end: text('source_span_end'),
42
+ // Content
43
+ content: text('content').notNull(),
44
+ confidence: real('confidence'),
45
+ // Timestamps
46
+ observed_at: text('observed_at'),
47
+ created_at: text('created_at').notNull(),
48
+ // Derivation lineage
49
+ derived_from: text('derived_from'),
50
+ }, (table) => ({
51
+ type_idx: index('idx_obs_type').on(table.type),
52
+ source_idx: index('idx_obs_source').on(table.source_store_id, table.source_version),
53
+ type_observed_idx: index('idx_obs_type_observed').on(table.type, table.observed_at),
54
+ type_source_idx: index('idx_obs_type_source').on(table.type, table.source_store_id),
55
+ }));
@@ -0,0 +1,75 @@
1
+ /**
2
+ * @module ObservationsStorage
3
+ * @description Raw storage interface and row conversion utilities for observations.
4
+ */
5
+ import type { Result, CorpusError } from '../types';
6
+ import type { ObservationRow } from './schema';
7
+ import type { Observation, ObservationMeta, SnapshotPointer } from './types';
8
+ /**
9
+ * Query options for raw storage operations.
10
+ * Dates are ISO strings at the storage layer.
11
+ */
12
+ export type StorageQueryOpts = {
13
+ type?: string | string[];
14
+ source_store_id?: string;
15
+ source_version?: string;
16
+ source_prefix?: string;
17
+ created_after?: string;
18
+ created_before?: string;
19
+ observed_after?: string;
20
+ observed_before?: string;
21
+ limit?: number;
22
+ };
23
+ /**
24
+ * Raw storage adapter for observation rows.
25
+ * Backends implement this thin interface; all business logic lives in the client.
26
+ */
27
+ export type ObservationsStorage = {
28
+ /** Store a row. Returns the row on success. */
29
+ put_row: (row: ObservationRow) => Promise<Result<ObservationRow, CorpusError>>;
30
+ /** Get a row by ID. Returns null if not found. */
31
+ get_row: (id: string) => Promise<Result<ObservationRow | null, CorpusError>>;
32
+ /** Query rows with optional filters. */
33
+ query_rows: (opts?: StorageQueryOpts) => AsyncIterable<ObservationRow>;
34
+ /** Delete a row by ID. Returns true if deleted, false if not found. */
35
+ delete_row: (id: string) => Promise<Result<boolean, CorpusError>>;
36
+ /** Delete rows matching source. Returns count deleted. */
37
+ delete_by_source: (store_id: string, version: string, path?: string) => Promise<Result<number, CorpusError>>;
38
+ };
39
+ /**
40
+ * Convert a storage row to an Observation (includes content).
41
+ */
42
+ export declare function row_to_observation(row: ObservationRow): Observation;
43
+ /**
44
+ * Convert a storage row to ObservationMeta (excludes content).
45
+ */
46
+ export declare function row_to_meta(row: ObservationRow): ObservationMeta;
47
+ /**
48
+ * Create an ObservationRow from put options.
49
+ */
50
+ export declare function create_observation_row(id: string, type_name: string, source: SnapshotPointer, content: unknown, opts: {
51
+ confidence?: number;
52
+ observed_at?: Date;
53
+ derived_from?: SnapshotPointer[];
54
+ }): ObservationRow;
55
+ /**
56
+ * Filter and sort observation rows based on query options.
57
+ * Used by in-memory storage implementations (memory backend, file backend).
58
+ */
59
+ export declare function filter_observation_rows(rows: ObservationRow[], opts?: StorageQueryOpts): ObservationRow[];
60
+ /**
61
+ * Simple CRUD interface for observation storage backends.
62
+ */
63
+ export type ObservationsCRUD = {
64
+ get_all: () => Promise<ObservationRow[]>;
65
+ set_all: (rows: ObservationRow[]) => Promise<void>;
66
+ get_one: (id: string) => Promise<ObservationRow | null>;
67
+ add_one: (row: ObservationRow) => Promise<void>;
68
+ remove_one: (id: string) => Promise<boolean>;
69
+ };
70
+ /**
71
+ * Create an ObservationsStorage from simple CRUD operations.
72
+ * Used by memory and file backends.
73
+ */
74
+ export declare function create_observations_storage(crud: ObservationsCRUD): ObservationsStorage;
75
+ //# sourceMappingURL=storage.d.ts.map