@mepuka/skygent 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/index.ts +146 -0
- package/package.json +56 -0
- package/src/cli/app.ts +75 -0
- package/src/cli/config-command.ts +140 -0
- package/src/cli/config.ts +91 -0
- package/src/cli/derive.ts +205 -0
- package/src/cli/doc/annotation.ts +36 -0
- package/src/cli/doc/filter.ts +69 -0
- package/src/cli/doc/index.ts +9 -0
- package/src/cli/doc/post.ts +155 -0
- package/src/cli/doc/primitives.ts +25 -0
- package/src/cli/doc/render.ts +18 -0
- package/src/cli/doc/table.ts +114 -0
- package/src/cli/doc/thread.ts +46 -0
- package/src/cli/doc/tree.ts +126 -0
- package/src/cli/errors.ts +59 -0
- package/src/cli/exit-codes.ts +52 -0
- package/src/cli/feed.ts +177 -0
- package/src/cli/filter-dsl.ts +1411 -0
- package/src/cli/filter-errors.ts +208 -0
- package/src/cli/filter-help.ts +70 -0
- package/src/cli/filter-input.ts +54 -0
- package/src/cli/filter.ts +435 -0
- package/src/cli/graph.ts +472 -0
- package/src/cli/help.ts +14 -0
- package/src/cli/interval.ts +35 -0
- package/src/cli/jetstream.ts +173 -0
- package/src/cli/layers.ts +180 -0
- package/src/cli/logging.ts +136 -0
- package/src/cli/output-format.ts +26 -0
- package/src/cli/output.ts +82 -0
- package/src/cli/parse.ts +80 -0
- package/src/cli/post.ts +193 -0
- package/src/cli/preferences.ts +11 -0
- package/src/cli/query-fields.ts +247 -0
- package/src/cli/query.ts +415 -0
- package/src/cli/range.ts +44 -0
- package/src/cli/search.ts +465 -0
- package/src/cli/shared-options.ts +169 -0
- package/src/cli/shared.ts +20 -0
- package/src/cli/store-errors.ts +80 -0
- package/src/cli/store-tree.ts +392 -0
- package/src/cli/store.ts +395 -0
- package/src/cli/sync-factory.ts +107 -0
- package/src/cli/sync.ts +366 -0
- package/src/cli/view-thread.ts +196 -0
- package/src/cli/view.ts +47 -0
- package/src/cli/watch.ts +344 -0
- package/src/db/migrations/store-catalog/001_init.ts +14 -0
- package/src/db/migrations/store-index/001_init.ts +34 -0
- package/src/db/migrations/store-index/002_event_log.ts +24 -0
- package/src/db/migrations/store-index/003_fts_and_derived.ts +52 -0
- package/src/db/migrations/store-index/004_query_indexes.ts +9 -0
- package/src/db/migrations/store-index/005_post_lang.ts +15 -0
- package/src/db/migrations/store-index/006_has_embed.ts +10 -0
- package/src/db/migrations/store-index/007_event_seq_and_checkpoints.ts +68 -0
- package/src/domain/bsky.ts +467 -0
- package/src/domain/config.ts +11 -0
- package/src/domain/credentials.ts +6 -0
- package/src/domain/defaults.ts +8 -0
- package/src/domain/derivation.ts +55 -0
- package/src/domain/errors.ts +71 -0
- package/src/domain/events.ts +55 -0
- package/src/domain/extract.ts +64 -0
- package/src/domain/filter-describe.ts +551 -0
- package/src/domain/filter-explain.ts +9 -0
- package/src/domain/filter.ts +797 -0
- package/src/domain/format.ts +91 -0
- package/src/domain/index.ts +13 -0
- package/src/domain/indexes.ts +17 -0
- package/src/domain/policies.ts +16 -0
- package/src/domain/post.ts +88 -0
- package/src/domain/primitives.ts +50 -0
- package/src/domain/raw.ts +140 -0
- package/src/domain/store.ts +103 -0
- package/src/domain/sync.ts +211 -0
- package/src/domain/text-width.ts +56 -0
- package/src/services/app-config.ts +278 -0
- package/src/services/bsky-client.ts +2113 -0
- package/src/services/credential-store.ts +408 -0
- package/src/services/derivation-engine.ts +502 -0
- package/src/services/derivation-settings.ts +61 -0
- package/src/services/derivation-validator.ts +68 -0
- package/src/services/filter-compiler.ts +269 -0
- package/src/services/filter-library.ts +371 -0
- package/src/services/filter-runtime.ts +821 -0
- package/src/services/filter-settings.ts +30 -0
- package/src/services/identity-resolver.ts +563 -0
- package/src/services/jetstream-sync.ts +636 -0
- package/src/services/lineage-store.ts +89 -0
- package/src/services/link-validator.ts +244 -0
- package/src/services/output-manager.ts +274 -0
- package/src/services/post-parser.ts +62 -0
- package/src/services/profile-resolver.ts +223 -0
- package/src/services/resource-monitor.ts +106 -0
- package/src/services/shared.ts +69 -0
- package/src/services/store-cleaner.ts +43 -0
- package/src/services/store-commit.ts +168 -0
- package/src/services/store-db.ts +248 -0
- package/src/services/store-event-log.ts +285 -0
- package/src/services/store-index-sql.ts +289 -0
- package/src/services/store-index.ts +1152 -0
- package/src/services/store-keys.ts +4 -0
- package/src/services/store-manager.ts +358 -0
- package/src/services/store-stats.ts +522 -0
- package/src/services/store-writer.ts +200 -0
- package/src/services/sync-checkpoint-store.ts +169 -0
- package/src/services/sync-engine.ts +547 -0
- package/src/services/sync-reporter.ts +16 -0
- package/src/services/sync-settings.ts +72 -0
- package/src/services/trending-topics.ts +226 -0
- package/src/services/view-checkpoint-store.ts +238 -0
- package/src/typeclass/chunk.ts +84 -0
|
@@ -0,0 +1,1152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* StoreIndex Service - SQLite-based Indexing for Posts
|
|
3
|
+
*
|
|
4
|
+
* This module provides the StoreIndex service, which manages SQLite-based indexing
|
|
5
|
+
* of posts within a store. The index maintains a synchronized view of all posts
|
|
6
|
+
* based on events from the StoreEventLog, enabling efficient querying, filtering,
|
|
7
|
+
* and full-text search capabilities.
|
|
8
|
+
*
|
|
9
|
+
* ## Architecture
|
|
10
|
+
*
|
|
11
|
+
* The StoreIndex uses SQLite as its backing store and maintains several tables:
|
|
12
|
+
* - `posts`: Main post data with JSON serialization, engagement metrics, and metadata
|
|
13
|
+
* - `post_hashtag`: Many-to-many relationship for hashtags
|
|
14
|
+
* - `post_lang`: Language tags for posts
|
|
15
|
+
* - `posts_fts`: Full-text search index using SQLite FTS5
|
|
16
|
+
* - `index_checkpoints`: Tracks indexing progress for incremental rebuilds
|
|
17
|
+
*
|
|
18
|
+
* ## Query Pushdown (Predicate Pushdown)
|
|
19
|
+
*
|
|
20
|
+
* The index implements query pushdown (also known as predicate pushdown) to optimize
|
|
21
|
+
* query performance. This means filter expressions are translated into SQL WHERE clauses
|
|
22
|
+
* where possible, reducing the amount of data that needs to be fetched and processed.
|
|
23
|
+
*
|
|
24
|
+
* The {@link PushdownExpr} type represents expressions that can be pushed down to SQLite.
|
|
25
|
+
* Not all filter expressions can be pushed down (e.g., some complex predicates must be
|
|
26
|
+
* evaluated in-memory), so the pushdown system identifies which constraints can be
|
|
27
|
+
* handled by the database layer.
|
|
28
|
+
*
|
|
29
|
+
* ### Pushdown Expression Types
|
|
30
|
+
*
|
|
31
|
+
* - **Logical**: True, False, And, Or
|
|
32
|
+
* - **Author filters**: Author (single), AuthorIn (multiple handles)
|
|
33
|
+
* - **Content filters**: Hashtag, HashtagIn, Contains (text search)
|
|
34
|
+
* - **Temporal filters**: DateRange (created_at bounds)
|
|
35
|
+
* - **Post type filters**: IsReply, IsQuote, IsRepost, IsOriginal
|
|
36
|
+
* - **Media filters**: HasLinks, HasMedia, HasEmbed, HasImages, HasVideo
|
|
37
|
+
* - **Language filters**: Language (post language matching)
|
|
38
|
+
* - **Engagement filters**: Engagement (min likes/reposts/replies)
|
|
39
|
+
*
|
|
40
|
+
* ## Key Features
|
|
41
|
+
*
|
|
42
|
+
* 1. **Event-driven indexing**: Reacts to PostUpsert and PostDelete events from the event log
|
|
43
|
+
* 2. **Incremental rebuilds**: Checkpoint system enables resuming indexing without reprocessing
|
|
44
|
+
* 3. **Full-text search**: SQLite FTS5 integration for text search across post content
|
|
45
|
+
* 4. **Efficient querying**: Pushdown predicates reduce data transfer and improve performance
|
|
46
|
+
* 5. **Streaming results**: Large result sets are streamed using pagination
|
|
47
|
+
* 6. **Bootstrap on first access**: Automatic index initialization when first accessed
|
|
48
|
+
*
|
|
49
|
+
* ## Dependencies
|
|
50
|
+
*
|
|
51
|
+
* - {@link StoreDb}: Provides SQLite client connections per store
|
|
52
|
+
* - {@link StoreEventLog}: Source of truth for post events, used during rebuilds
|
|
53
|
+
*
|
|
54
|
+
* @module StoreIndex
|
|
55
|
+
*/
|
|
56
|
+
|
|
57
|
+
import { Chunk, Clock, Context, Effect, Layer, Option, Ref, Schema, Stream } from "effect";
|
|
58
|
+
import * as SqlClient from "@effect/sql/SqlClient";
|
|
59
|
+
import * as SqlSchema from "@effect/sql/SqlSchema";
|
|
60
|
+
import type { Fragment } from "@effect/sql/Statement";
|
|
61
|
+
import { StoreIndexError } from "../domain/errors.js";
|
|
62
|
+
import { PostEventRecord } from "../domain/events.js";
|
|
63
|
+
import type { PostEvent, StoreQuery } from "../domain/events.js";
|
|
64
|
+
import type { FilterExpr } from "../domain/filter.js";
|
|
65
|
+
import { IndexCheckpoint, PostIndexEntry } from "../domain/indexes.js";
|
|
66
|
+
import { EventSeq, Handle, PostUri, Timestamp } from "../domain/primitives.js";
|
|
67
|
+
import { Post } from "../domain/post.js";
|
|
68
|
+
import type { StoreRef } from "../domain/store.js";
|
|
69
|
+
import { StoreDb } from "./store-db.js";
|
|
70
|
+
import { StoreEventLog } from "./store-event-log.js";
|
|
71
|
+
import { deletePost, upsertPost } from "./store-index-sql.js";
|
|
72
|
+
|
|
73
|
+
const indexName = "primary";
|
|
74
|
+
const entryPageSize = 500;
|
|
75
|
+
|
|
76
|
+
type SearchSort = "relevance" | "newest" | "oldest";
|
|
77
|
+
type QueryCursorState = {
|
|
78
|
+
readonly lastCreatedAt: string | undefined;
|
|
79
|
+
readonly lastUri: string | undefined;
|
|
80
|
+
readonly fetched: number;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
const postUriRow = Schema.Struct({ uri: PostUri });
|
|
84
|
+
const postJsonRow = Schema.Struct({ post_json: Schema.String });
|
|
85
|
+
const postEntryRow = Schema.Struct({
|
|
86
|
+
uri: PostUri,
|
|
87
|
+
created_date: Schema.String,
|
|
88
|
+
author: Schema.NullOr(Handle),
|
|
89
|
+
hashtags: Schema.NullOr(Schema.String)
|
|
90
|
+
});
|
|
91
|
+
const checkpointRow = Schema.Struct({
|
|
92
|
+
index_name: Schema.String,
|
|
93
|
+
version: Schema.Number,
|
|
94
|
+
last_event_seq: EventSeq,
|
|
95
|
+
event_count: Schema.Number,
|
|
96
|
+
updated_at: Schema.String
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
const toStoreIndexError = (message: string) => (cause: unknown) =>
|
|
100
|
+
cause instanceof StoreIndexError
|
|
101
|
+
? cause
|
|
102
|
+
: StoreIndexError.make({ message, cause });
|
|
103
|
+
|
|
104
|
+
const decodePostJson = (raw: string) =>
|
|
105
|
+
Schema.decodeUnknown(Schema.parseJson(Post))(raw).pipe(
|
|
106
|
+
Effect.mapError(toStoreIndexError("StoreIndex.post decode failed"))
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const ftsOperatorPattern = /\b(AND|OR|NOT|NEAR)\b/i;
|
|
110
|
+
const ftsSyntaxPattern = /["*():^]/;
|
|
111
|
+
|
|
112
|
+
const hasFtsSyntax = (query: string) =>
|
|
113
|
+
ftsOperatorPattern.test(query) || ftsSyntaxPattern.test(query);
|
|
114
|
+
|
|
115
|
+
const buildLiteralFtsQuery = (query: string) => {
|
|
116
|
+
const tokens = query.trim().split(/\s+/).filter(Boolean);
|
|
117
|
+
if (tokens.length === 0) {
|
|
118
|
+
return "";
|
|
119
|
+
}
|
|
120
|
+
return tokens
|
|
121
|
+
.map((token) => `"${token.replaceAll("\"", "\"\"")}"`)
|
|
122
|
+
.join(" AND ");
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const decodeEntryRow = (row: typeof postEntryRow.Type) =>
|
|
126
|
+
Schema.decodeUnknown(PostIndexEntry)({
|
|
127
|
+
uri: row.uri,
|
|
128
|
+
createdDate: row.created_date,
|
|
129
|
+
hashtags: row.hashtags ? row.hashtags.split(",") : [],
|
|
130
|
+
author: row.author ?? undefined
|
|
131
|
+
}).pipe(Effect.mapError(toStoreIndexError("StoreIndex.entry decode failed")));
|
|
132
|
+
|
|
133
|
+
const decodeCheckpointRow = (row: typeof checkpointRow.Type) =>
|
|
134
|
+
Schema.decodeUnknown(IndexCheckpoint)({
|
|
135
|
+
index: row.index_name,
|
|
136
|
+
version: row.version,
|
|
137
|
+
lastEventSeq: row.last_event_seq,
|
|
138
|
+
eventCount: row.event_count,
|
|
139
|
+
updatedAt: row.updated_at
|
|
140
|
+
}).pipe(Effect.mapError(toStoreIndexError("StoreIndex.checkpoint decode failed")));
|
|
141
|
+
|
|
142
|
+
const toIso = (value: Date | string) =>
|
|
143
|
+
value instanceof Date ? value.toISOString() : new Date(value).toISOString();
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Pushdown Expression - Represents filter predicates that can be pushed down to SQLite
|
|
147
|
+
*
|
|
148
|
+
* Pushdown expressions are a subset of filter expressions that can be directly
|
|
149
|
+
* translated into SQL WHERE clauses. This enables the database to do the heavy
|
|
150
|
+
* lifting of filtering, reducing memory usage and improving query performance.
|
|
151
|
+
*
|
|
152
|
+
* The expression tree is built from {@link FilterExpr} via {@link buildPushdown}
|
|
153
|
+
* and then converted to SQL fragments via {@link pushdownToSql}.
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* // A complex filter becomes a pushdown expression:
|
|
157
|
+
* // Author("alice") AND Hashtag("tech") ->
|
|
158
|
+
* // { _tag: "And", clauses: [
|
|
159
|
+
* // { _tag: "Author", handle: "alice" },
|
|
160
|
+
* // { _tag: "Hashtag", tag: "tech" }
|
|
161
|
+
* // ]}
|
|
162
|
+
*/
|
|
163
|
+
type PushdownExpr =
|
|
164
|
+
/** Always true, no filtering needed */
|
|
165
|
+
| { readonly _tag: "True" }
|
|
166
|
+
/** Always false, returns no results */
|
|
167
|
+
| { readonly _tag: "False" }
|
|
168
|
+
/** Logical AND of multiple clauses */
|
|
169
|
+
| { readonly _tag: "And"; readonly clauses: ReadonlyArray<PushdownExpr> }
|
|
170
|
+
/** Logical OR of multiple clauses */
|
|
171
|
+
| { readonly _tag: "Or"; readonly clauses: ReadonlyArray<PushdownExpr> }
|
|
172
|
+
/** Filter by single author handle */
|
|
173
|
+
| { readonly _tag: "Author"; readonly handle: string }
|
|
174
|
+
/** Filter by multiple author handles */
|
|
175
|
+
| { readonly _tag: "AuthorIn"; readonly handles: ReadonlyArray<string> }
|
|
176
|
+
/** Filter by single hashtag */
|
|
177
|
+
| { readonly _tag: "Hashtag"; readonly tag: string }
|
|
178
|
+
/** Filter by multiple hashtags */
|
|
179
|
+
| { readonly _tag: "HashtagIn"; readonly tags: ReadonlyArray<string> }
|
|
180
|
+
/** Filter by creation date range (inclusive) */
|
|
181
|
+
| { readonly _tag: "DateRange"; readonly start: Timestamp; readonly end: Timestamp }
|
|
182
|
+
/** Filter for reply posts only */
|
|
183
|
+
| { readonly _tag: "IsReply" }
|
|
184
|
+
/** Filter for quote posts only */
|
|
185
|
+
| { readonly _tag: "IsQuote" }
|
|
186
|
+
/** Filter for reposts only */
|
|
187
|
+
| { readonly _tag: "IsRepost" }
|
|
188
|
+
/** Filter for original posts only (not replies, quotes, or reposts) */
|
|
189
|
+
| { readonly _tag: "IsOriginal" }
|
|
190
|
+
/** Filter for posts containing links */
|
|
191
|
+
| { readonly _tag: "HasLinks" }
|
|
192
|
+
/** Filter for posts with any media */
|
|
193
|
+
| { readonly _tag: "HasMedia" }
|
|
194
|
+
/** Filter for posts with any embed */
|
|
195
|
+
| { readonly _tag: "HasEmbed" }
|
|
196
|
+
/** Filter for posts with images */
|
|
197
|
+
| { readonly _tag: "HasImages" }
|
|
198
|
+
/** Filter for posts with video */
|
|
199
|
+
| { readonly _tag: "HasVideo" }
|
|
200
|
+
/** Filter by post language */
|
|
201
|
+
| { readonly _tag: "Language"; readonly langs: ReadonlyArray<string> }
|
|
202
|
+
/** Filter by engagement metrics (likes, reposts, replies) */
|
|
203
|
+
| { readonly _tag: "Engagement"; readonly minLikes?: number; readonly minReposts?: number; readonly minReplies?: number }
|
|
204
|
+
/** Filter by text content (case-sensitive or insensitive) */
|
|
205
|
+
| { readonly _tag: "Contains"; readonly text: string; readonly caseSensitive: boolean };
|
|
206
|
+
|
|
207
|
+
const pushdownTrue: PushdownExpr = { _tag: "True" };
|
|
208
|
+
const pushdownFalse: PushdownExpr = { _tag: "False" };
|
|
209
|
+
|
|
210
|
+
const simplifyAnd = (clauses: ReadonlyArray<PushdownExpr>): PushdownExpr => {
|
|
211
|
+
const flattened: Array<PushdownExpr> = [];
|
|
212
|
+
for (const clause of clauses) {
|
|
213
|
+
if (clause._tag === "False") {
|
|
214
|
+
return pushdownFalse;
|
|
215
|
+
}
|
|
216
|
+
if (clause._tag === "True") {
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
if (clause._tag === "And") {
|
|
220
|
+
flattened.push(...clause.clauses);
|
|
221
|
+
continue;
|
|
222
|
+
}
|
|
223
|
+
flattened.push(clause);
|
|
224
|
+
}
|
|
225
|
+
if (flattened.length === 0) {
|
|
226
|
+
return pushdownTrue;
|
|
227
|
+
}
|
|
228
|
+
if (flattened.length === 1) {
|
|
229
|
+
return flattened[0]!;
|
|
230
|
+
}
|
|
231
|
+
return { _tag: "And", clauses: flattened };
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
const simplifyOr = (clauses: ReadonlyArray<PushdownExpr>): PushdownExpr => {
|
|
235
|
+
const flattened: Array<PushdownExpr> = [];
|
|
236
|
+
for (const clause of clauses) {
|
|
237
|
+
if (clause._tag === "True") {
|
|
238
|
+
return pushdownTrue;
|
|
239
|
+
}
|
|
240
|
+
if (clause._tag === "False") {
|
|
241
|
+
continue;
|
|
242
|
+
}
|
|
243
|
+
if (clause._tag === "Or") {
|
|
244
|
+
flattened.push(...clause.clauses);
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
flattened.push(clause);
|
|
248
|
+
}
|
|
249
|
+
if (flattened.length === 0) {
|
|
250
|
+
return pushdownFalse;
|
|
251
|
+
}
|
|
252
|
+
if (flattened.length === 1) {
|
|
253
|
+
return flattened[0]!;
|
|
254
|
+
}
|
|
255
|
+
return { _tag: "Or", clauses: flattened };
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
const buildPushdown = (expr: FilterExpr | undefined): PushdownExpr => {
|
|
259
|
+
if (!expr) {
|
|
260
|
+
return pushdownTrue;
|
|
261
|
+
}
|
|
262
|
+
switch (expr._tag) {
|
|
263
|
+
case "All":
|
|
264
|
+
return pushdownTrue;
|
|
265
|
+
case "None":
|
|
266
|
+
return pushdownFalse;
|
|
267
|
+
case "And":
|
|
268
|
+
return simplifyAnd([buildPushdown(expr.left), buildPushdown(expr.right)]);
|
|
269
|
+
case "Or":
|
|
270
|
+
return simplifyOr([buildPushdown(expr.left), buildPushdown(expr.right)]);
|
|
271
|
+
case "Author":
|
|
272
|
+
return { _tag: "Author", handle: expr.handle };
|
|
273
|
+
case "AuthorIn":
|
|
274
|
+
return expr.handles.length === 0
|
|
275
|
+
? pushdownFalse
|
|
276
|
+
: { _tag: "AuthorIn", handles: Array.from(new Set(expr.handles)) };
|
|
277
|
+
case "Hashtag":
|
|
278
|
+
return { _tag: "Hashtag", tag: expr.tag };
|
|
279
|
+
case "HashtagIn":
|
|
280
|
+
return expr.tags.length === 0
|
|
281
|
+
? pushdownFalse
|
|
282
|
+
: { _tag: "HashtagIn", tags: Array.from(new Set(expr.tags)) };
|
|
283
|
+
case "DateRange":
|
|
284
|
+
return { _tag: "DateRange", start: expr.start, end: expr.end };
|
|
285
|
+
case "IsReply":
|
|
286
|
+
return { _tag: "IsReply" };
|
|
287
|
+
case "IsQuote":
|
|
288
|
+
return { _tag: "IsQuote" };
|
|
289
|
+
case "IsRepost":
|
|
290
|
+
return { _tag: "IsRepost" };
|
|
291
|
+
case "IsOriginal":
|
|
292
|
+
return { _tag: "IsOriginal" };
|
|
293
|
+
case "HasLinks":
|
|
294
|
+
return { _tag: "HasLinks" };
|
|
295
|
+
case "HasMedia":
|
|
296
|
+
return { _tag: "HasMedia" };
|
|
297
|
+
case "HasEmbed":
|
|
298
|
+
return { _tag: "HasEmbed" };
|
|
299
|
+
case "HasImages":
|
|
300
|
+
return { _tag: "HasImages" };
|
|
301
|
+
case "HasVideo":
|
|
302
|
+
return { _tag: "HasVideo" };
|
|
303
|
+
case "Language":
|
|
304
|
+
if (expr.langs.length === 0) {
|
|
305
|
+
return pushdownFalse;
|
|
306
|
+
}
|
|
307
|
+
const langs = normalizeLangs(expr.langs);
|
|
308
|
+
return langs.length === 0 ? pushdownFalse : { _tag: "Language", langs };
|
|
309
|
+
case "Engagement":
|
|
310
|
+
return {
|
|
311
|
+
_tag: "Engagement",
|
|
312
|
+
...(expr.minLikes !== undefined ? { minLikes: expr.minLikes } : {}),
|
|
313
|
+
...(expr.minReposts !== undefined ? { minReposts: expr.minReposts } : {}),
|
|
314
|
+
...(expr.minReplies !== undefined ? { minReplies: expr.minReplies } : {})
|
|
315
|
+
};
|
|
316
|
+
case "Contains":
|
|
317
|
+
return {
|
|
318
|
+
_tag: "Contains",
|
|
319
|
+
text: expr.text,
|
|
320
|
+
caseSensitive: expr.caseSensitive ?? false
|
|
321
|
+
};
|
|
322
|
+
default:
|
|
323
|
+
return pushdownTrue;
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
const isAscii = (value: string) => /^[\x00-\x7F]*$/.test(value);
|
|
328
|
+
|
|
329
|
+
const normalizeLangs = (langs: ReadonlyArray<string>) =>
|
|
330
|
+
Array.from(
|
|
331
|
+
new Set(
|
|
332
|
+
langs.map((lang) => lang.trim().toLowerCase()).filter((lang) => lang.length > 0)
|
|
333
|
+
)
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
const pushdownToSql = (
|
|
337
|
+
sql: SqlClient.SqlClient,
|
|
338
|
+
expr: PushdownExpr
|
|
339
|
+
): Fragment | undefined => {
|
|
340
|
+
switch (expr._tag) {
|
|
341
|
+
case "True":
|
|
342
|
+
return undefined;
|
|
343
|
+
case "False":
|
|
344
|
+
return sql`1=0`;
|
|
345
|
+
case "Author":
|
|
346
|
+
return sql`p.author = ${expr.handle}`;
|
|
347
|
+
case "AuthorIn":
|
|
348
|
+
return expr.handles.length === 0
|
|
349
|
+
? sql`1=0`
|
|
350
|
+
: sql`p.author IN ${sql.in(expr.handles)}`;
|
|
351
|
+
case "Hashtag":
|
|
352
|
+
return sql`EXISTS (SELECT 1 FROM post_hashtag h WHERE h.uri = p.uri AND h.tag = ${expr.tag})`;
|
|
353
|
+
case "HashtagIn":
|
|
354
|
+
return expr.tags.length === 0
|
|
355
|
+
? sql`1=0`
|
|
356
|
+
: sql`EXISTS (SELECT 1 FROM post_hashtag h WHERE h.uri = p.uri AND h.tag IN ${sql.in(expr.tags)})`;
|
|
357
|
+
case "DateRange": {
|
|
358
|
+
const start = toIso(expr.start);
|
|
359
|
+
const end = toIso(expr.end);
|
|
360
|
+
return sql`p.created_at >= ${start} AND p.created_at <= ${end}`;
|
|
361
|
+
}
|
|
362
|
+
case "IsReply":
|
|
363
|
+
return sql`p.is_reply = 1`;
|
|
364
|
+
case "IsQuote":
|
|
365
|
+
return sql`p.is_quote = 1`;
|
|
366
|
+
case "IsRepost":
|
|
367
|
+
return sql`p.is_repost = 1`;
|
|
368
|
+
case "IsOriginal":
|
|
369
|
+
return sql`p.is_original = 1`;
|
|
370
|
+
case "HasLinks":
|
|
371
|
+
return sql`p.has_links = 1`;
|
|
372
|
+
case "HasMedia":
|
|
373
|
+
return sql`p.has_media = 1`;
|
|
374
|
+
case "HasEmbed":
|
|
375
|
+
return sql`p.has_embed = 1`;
|
|
376
|
+
case "HasImages":
|
|
377
|
+
return sql`p.has_images = 1`;
|
|
378
|
+
case "HasVideo":
|
|
379
|
+
return sql`p.has_video = 1`;
|
|
380
|
+
case "Language":
|
|
381
|
+
return expr.langs.length === 0
|
|
382
|
+
? sql`1=0`
|
|
383
|
+
: sql`(
|
|
384
|
+
EXISTS (
|
|
385
|
+
SELECT 1 FROM post_lang l
|
|
386
|
+
WHERE l.uri = p.uri AND l.lang IN ${sql.in(expr.langs)}
|
|
387
|
+
)
|
|
388
|
+
OR (
|
|
389
|
+
p.lang IS NOT NULL
|
|
390
|
+
AND lower(p.lang) IN ${sql.in(expr.langs)}
|
|
391
|
+
)
|
|
392
|
+
)`;
|
|
393
|
+
case "Engagement": {
|
|
394
|
+
const clauses: Array<Fragment> = [];
|
|
395
|
+
if (expr.minLikes !== undefined) {
|
|
396
|
+
clauses.push(sql`p.like_count >= ${expr.minLikes}`);
|
|
397
|
+
}
|
|
398
|
+
if (expr.minReposts !== undefined) {
|
|
399
|
+
clauses.push(sql`p.repost_count >= ${expr.minReposts}`);
|
|
400
|
+
}
|
|
401
|
+
if (expr.minReplies !== undefined) {
|
|
402
|
+
clauses.push(sql`p.reply_count >= ${expr.minReplies}`);
|
|
403
|
+
}
|
|
404
|
+
if (clauses.length === 0) {
|
|
405
|
+
return undefined;
|
|
406
|
+
}
|
|
407
|
+
return sql.and(clauses);
|
|
408
|
+
}
|
|
409
|
+
case "Contains": {
|
|
410
|
+
const text = expr.text;
|
|
411
|
+
if (text.length === 0) {
|
|
412
|
+
return undefined;
|
|
413
|
+
}
|
|
414
|
+
if (expr.caseSensitive) {
|
|
415
|
+
return sql`instr(p.text, ${text}) > 0`;
|
|
416
|
+
}
|
|
417
|
+
if (!isAscii(text)) {
|
|
418
|
+
return undefined;
|
|
419
|
+
}
|
|
420
|
+
return sql`instr(lower(p.text), lower(${text})) > 0`;
|
|
421
|
+
}
|
|
422
|
+
case "And": {
|
|
423
|
+
const clauses = expr.clauses
|
|
424
|
+
.map((clause) => pushdownToSql(sql, clause))
|
|
425
|
+
.filter((clause): clause is Fragment => clause !== undefined);
|
|
426
|
+
if (clauses.length === 0) {
|
|
427
|
+
return undefined;
|
|
428
|
+
}
|
|
429
|
+
return sql.and(clauses);
|
|
430
|
+
}
|
|
431
|
+
case "Or": {
|
|
432
|
+
const clauses: Array<Fragment> = [];
|
|
433
|
+
for (const clause of expr.clauses) {
|
|
434
|
+
const next = pushdownToSql(sql, clause);
|
|
435
|
+
if (!next) {
|
|
436
|
+
return undefined;
|
|
437
|
+
}
|
|
438
|
+
clauses.push(next);
|
|
439
|
+
}
|
|
440
|
+
if (clauses.length === 0) {
|
|
441
|
+
return undefined;
|
|
442
|
+
}
|
|
443
|
+
return sql.or(clauses);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
};
|
|
447
|
+
|
|
448
|
+
const applyUpsert = (
|
|
449
|
+
sql: SqlClient.SqlClient,
|
|
450
|
+
event: Extract<PostEvent, { _tag: "PostUpsert" }>
|
|
451
|
+
) => upsertPost(sql, event.post);
|
|
452
|
+
|
|
453
|
+
const applyDelete = (
|
|
454
|
+
sql: SqlClient.SqlClient,
|
|
455
|
+
event: Extract<PostEvent, { _tag: "PostDelete" }>
|
|
456
|
+
) => deletePost(sql, event.uri);
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* StoreIndex Service - Effect Tag and Layer for post indexing
|
|
460
|
+
*
|
|
461
|
+
* The StoreIndex service provides a complete indexing solution for posts within
|
|
462
|
+
* a store. It maintains SQLite tables synchronized with the event log and offers
|
|
463
|
+
* various querying capabilities including filtering, full-text search, and
|
|
464
|
+
* streaming access to all indexed posts.
|
|
465
|
+
*
|
|
466
|
+
* ## Usage
|
|
467
|
+
*
|
|
468
|
+
* ```typescript
|
|
469
|
+
* // Apply a single event to the index
|
|
470
|
+
* yield* StoreIndex.apply(store, eventRecord);
|
|
471
|
+
*
|
|
472
|
+
* // Query posts with filters
|
|
473
|
+
* const posts = yield* StoreIndex.query(store, {
|
|
474
|
+
* filter: { _tag: "Hashtag", tag: "tech" },
|
|
475
|
+
* order: "desc"
|
|
476
|
+
* }).pipe(Stream.runCollect);
|
|
477
|
+
*
|
|
478
|
+
* // Search posts using full-text search
|
|
479
|
+
* const results = yield* StoreIndex.searchPosts(store, {
|
|
480
|
+
* query: "javascript tutorial",
|
|
481
|
+
* limit: 25
|
|
482
|
+
* });
|
|
483
|
+
*
|
|
484
|
+
* // Get checkpoint for incremental processing
|
|
485
|
+
* const checkpoint = yield* StoreIndex.loadCheckpoint(store, "primary");
|
|
486
|
+
* ```
|
|
487
|
+
*
|
|
488
|
+
* ## Automatic Bootstrap
|
|
489
|
+
*
|
|
490
|
+
* On first access to any store, the service automatically checks if the index
|
|
491
|
+
* needs to be bootstrapped. If the posts table is empty but events exist in the
|
|
492
|
+
* event log, a rebuild is triggered automatically.
|
|
493
|
+
*
|
|
494
|
+
* ## Checkpoint System
|
|
495
|
+
*
|
|
496
|
+
* Checkpoints track indexing progress, storing the last processed event ID and
|
|
497
|
+
* total event count. This enables:
|
|
498
|
+
* - Resuming interrupted rebuilds without reprocessing
|
|
499
|
+
* - Incremental updates (process only new events since last checkpoint)
|
|
500
|
+
* - Multiple index versions (tracked by index name)
|
|
501
|
+
*/
|
|
502
|
+
export class StoreIndex extends Context.Tag("@skygent/StoreIndex")<
|
|
503
|
+
StoreIndex,
|
|
504
|
+
{
|
|
505
|
+
/**
|
|
506
|
+
* Apply a single post event to the index
|
|
507
|
+
*
|
|
508
|
+
* Processes a PostEventRecord and updates the index accordingly:
|
|
509
|
+
* - PostUpsert: Inserts or updates the post with all metadata
|
|
510
|
+
* - PostDelete: Removes the post and related data from index
|
|
511
|
+
*
|
|
512
|
+
* @param store - Store reference to apply the event to
|
|
513
|
+
* @param record - The post event record containing the event to apply
|
|
514
|
+
* @returns Effect that completes when the index has been updated
|
|
515
|
+
*/
|
|
516
|
+
readonly apply: (
|
|
517
|
+
store: StoreRef,
|
|
518
|
+
record: PostEventRecord
|
|
519
|
+
) => Effect.Effect<void, StoreIndexError>;
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Get all post URIs created on a specific date
|
|
523
|
+
*
|
|
524
|
+
* Returns posts ordered by creation time (ascending). The date should be
|
|
525
|
+
* in ISO date format (YYYY-MM-DD).
|
|
526
|
+
*
|
|
527
|
+
* @param store - Store reference to query
|
|
528
|
+
* @param date - ISO date string (YYYY-MM-DD)
|
|
529
|
+
* @returns Effect containing array of post URIs
|
|
530
|
+
*/
|
|
531
|
+
readonly getByDate: (
|
|
532
|
+
store: StoreRef,
|
|
533
|
+
date: string
|
|
534
|
+
) => Effect.Effect<ReadonlyArray<PostUri>, StoreIndexError>;
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Get all post URIs tagged with a specific hashtag
|
|
538
|
+
*
|
|
539
|
+
* Returns posts ordered by URI (ascending). Case-sensitive exact match.
|
|
540
|
+
*
|
|
541
|
+
* @param store - Store reference to query
|
|
542
|
+
* @param tag - Hashtag to search for (without # prefix)
|
|
543
|
+
* @returns Effect containing array of post URIs
|
|
544
|
+
*/
|
|
545
|
+
readonly getByHashtag: (
|
|
546
|
+
store: StoreRef,
|
|
547
|
+
tag: string
|
|
548
|
+
) => Effect.Effect<ReadonlyArray<PostUri>, StoreIndexError>;
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Retrieve a single post by URI
|
|
552
|
+
*
|
|
553
|
+
* Fetches the post JSON from the database and decodes it into a Post object.
|
|
554
|
+
* Returns None if the post doesn't exist in the index.
|
|
555
|
+
*
|
|
556
|
+
* @param store - Store reference to query
|
|
557
|
+
* @param uri - Post URI to retrieve
|
|
558
|
+
* @returns Effect containing Option of Post (Some if found, None if not)
|
|
559
|
+
*/
|
|
560
|
+
readonly getPost: (
|
|
561
|
+
store: StoreRef,
|
|
562
|
+
uri: PostUri
|
|
563
|
+
) => Effect.Effect<Option.Option<Post>, StoreIndexError>;
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Check if a post URI exists in the index
|
|
567
|
+
*
|
|
568
|
+
* Efficient existence check using a LIMIT 1 query.
|
|
569
|
+
*
|
|
570
|
+
* @param store - Store reference to query
|
|
571
|
+
* @param uri - Post URI to check
|
|
572
|
+
* @returns Effect containing true if the URI exists, false otherwise
|
|
573
|
+
*/
|
|
574
|
+
readonly hasUri: (
|
|
575
|
+
store: StoreRef,
|
|
576
|
+
uri: PostUri
|
|
577
|
+
) => Effect.Effect<boolean, StoreIndexError>;
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Clear all indexed data for a store
|
|
581
|
+
*
|
|
582
|
+
* Removes all posts, hashtags, languages, and checkpoints from the index.
|
|
583
|
+
* This operation is performed in a transaction for consistency.
|
|
584
|
+
*
|
|
585
|
+
* @param store - Store reference to clear
|
|
586
|
+
* @returns Effect that completes when all data has been cleared
|
|
587
|
+
*/
|
|
588
|
+
readonly clear: (store: StoreRef) => Effect.Effect<void, StoreIndexError>;
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Load a checkpoint for an index
|
|
592
|
+
*
|
|
593
|
+
* Retrieves the stored checkpoint for a given index name, containing
|
|
594
|
+
* the last processed event ID and event count. Returns None if no
|
|
595
|
+
* checkpoint exists.
|
|
596
|
+
*
|
|
597
|
+
* @param store - Store reference to load from
|
|
598
|
+
* @param index - Name of the index to load checkpoint for
|
|
599
|
+
* @returns Effect containing Option of IndexCheckpoint
|
|
600
|
+
*/
|
|
601
|
+
readonly loadCheckpoint: (
|
|
602
|
+
store: StoreRef,
|
|
603
|
+
index: string
|
|
604
|
+
) => Effect.Effect<Option.Option<IndexCheckpoint>, StoreIndexError>;
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Save a checkpoint for an index
|
|
608
|
+
*
|
|
609
|
+
* Persists the checkpoint to the database, overwriting any existing
|
|
610
|
+
* checkpoint for the same index name.
|
|
611
|
+
*
|
|
612
|
+
* @param store - Store reference to save to
|
|
613
|
+
* @param checkpoint - Checkpoint data to persist
|
|
614
|
+
* @returns Effect that completes when checkpoint has been saved
|
|
615
|
+
*/
|
|
616
|
+
readonly saveCheckpoint: (
|
|
617
|
+
store: StoreRef,
|
|
618
|
+
checkpoint: IndexCheckpoint
|
|
619
|
+
) => Effect.Effect<void, StoreIndexError>;
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Query posts with filtering and pagination
|
|
623
|
+
*
|
|
624
|
+
* Executes a query against the index with optional:
|
|
625
|
+
* - Filter expressions (converted to SQL via pushdown)
|
|
626
|
+
* - Date range constraints
|
|
627
|
+
* - Scan limits (maximum posts to examine)
|
|
628
|
+
* - Sort order (ascending/descending)
|
|
629
|
+
*
|
|
630
|
+
* Results are streamed using keyset pagination for efficient large result sets.
|
|
631
|
+
*
|
|
632
|
+
* @param store - Store reference to query
|
|
633
|
+
* @param query - Query configuration including filter, range, limit, and order
|
|
634
|
+
* @returns Stream of Posts matching the query criteria
|
|
635
|
+
*/
|
|
636
|
+
readonly query: (
|
|
637
|
+
store: StoreRef,
|
|
638
|
+
query: StoreQuery
|
|
639
|
+
) => Stream.Stream<Post, StoreIndexError>;
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Search posts using full-text search (FTS5)
|
|
643
|
+
*
|
|
644
|
+
* Performs a full-text search across post content using SQLite FTS5.
|
|
645
|
+
* Supports different sort orders: relevance (BM25 ranking), newest, or oldest.
|
|
646
|
+
* Results are paginated using offset-based cursors.
|
|
647
|
+
*
|
|
648
|
+
* @param store - Store reference to search
|
|
649
|
+
* @param input - Search configuration
|
|
650
|
+
* @param input.query - Search query string (FTS5 syntax supported; plain text is sanitized)
|
|
651
|
+
* @param input.limit - Maximum results to return (default: 25)
|
|
652
|
+
* @param input.cursor - Offset for pagination (default: 0)
|
|
653
|
+
* @param input.sort - Sort order: "relevance" | "newest" | "oldest"
|
|
654
|
+
* @returns Effect containing search results and optional next cursor
|
|
655
|
+
*/
|
|
656
|
+
readonly searchPosts: (
|
|
657
|
+
store: StoreRef,
|
|
658
|
+
input: {
|
|
659
|
+
readonly query: string;
|
|
660
|
+
readonly limit?: number;
|
|
661
|
+
readonly cursor?: number;
|
|
662
|
+
readonly sort?: SearchSort;
|
|
663
|
+
}
|
|
664
|
+
) => Effect.Effect<{ readonly posts: ReadonlyArray<Post>; readonly cursor?: number }, StoreIndexError>;
|
|
665
|
+
|
|
666
|
+
/**
|
|
667
|
+
* Stream all index entries
|
|
668
|
+
*
|
|
669
|
+
* Returns a stream of PostIndexEntry objects containing basic metadata
|
|
670
|
+
* (URI, creation date, author, hashtags) for all posts in the index.
|
|
671
|
+
* Results are paginated internally and streamed as a continuous flow.
|
|
672
|
+
*
|
|
673
|
+
* @param store - Store reference to stream from
|
|
674
|
+
* @returns Stream of PostIndexEntry objects
|
|
675
|
+
*/
|
|
676
|
+
readonly entries: (store: StoreRef) => Stream.Stream<PostIndexEntry, StoreIndexError>;
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Count total posts in the index
|
|
680
|
+
*
|
|
681
|
+
* Returns the total number of posts currently indexed in the store.
|
|
682
|
+
*
|
|
683
|
+
* @param store - Store reference to count
|
|
684
|
+
* @returns Effect containing the post count
|
|
685
|
+
*/
|
|
686
|
+
readonly count: (store: StoreRef) => Effect.Effect<number, StoreIndexError>;
|
|
687
|
+
|
|
688
|
+
/**
|
|
689
|
+
* Rebuild the index from the event log
|
|
690
|
+
*
|
|
691
|
+
* Performs a full or incremental rebuild of the index:
|
|
692
|
+
* - If a checkpoint exists, only processes events after the checkpoint
|
|
693
|
+
* - If no checkpoint exists, processes all events from the beginning
|
|
694
|
+
* - Updates the checkpoint upon completion
|
|
695
|
+
* - Runs ANALYZE and PRAGMA optimize for query performance
|
|
696
|
+
*
|
|
697
|
+
* Events are processed in batches within transactions for efficiency.
|
|
698
|
+
*
|
|
699
|
+
* @param store - Store reference to rebuild
|
|
700
|
+
* @returns Effect that completes when rebuild is finished
|
|
701
|
+
*/
|
|
702
|
+
readonly rebuild: (
|
|
703
|
+
store: StoreRef
|
|
704
|
+
) => Effect.Effect<void, StoreIndexError>;
|
|
705
|
+
}
|
|
706
|
+
>() {
|
|
707
|
+
static readonly layer = Layer.scoped(
|
|
708
|
+
StoreIndex,
|
|
709
|
+
Effect.gen(function* () {
|
|
710
|
+
const eventLog = yield* StoreEventLog;
|
|
711
|
+
const storeDb = yield* StoreDb;
|
|
712
|
+
const bootstrapped = yield* Ref.make(new Set<string>());
|
|
713
|
+
const withClient = <A, E>(
|
|
714
|
+
store: StoreRef,
|
|
715
|
+
message: string,
|
|
716
|
+
run: (client: SqlClient.SqlClient) => Effect.Effect<A, E>
|
|
717
|
+
) =>
|
|
718
|
+
storeDb
|
|
719
|
+
.withClient(store, (client) =>
|
|
720
|
+
Ref.modify(bootstrapped, (state) => {
|
|
721
|
+
if (state.has(store.name)) {
|
|
722
|
+
return [Effect.void, state] as const;
|
|
723
|
+
}
|
|
724
|
+
const next = new Set(state);
|
|
725
|
+
next.add(store.name);
|
|
726
|
+
return [bootstrapStore(store, client), next] as const;
|
|
727
|
+
}).pipe(Effect.flatten, Effect.andThen(run(client)))
|
|
728
|
+
)
|
|
729
|
+
.pipe(Effect.mapError(toStoreIndexError(message)));
|
|
730
|
+
|
|
731
|
+
const loadCheckpointWithClient = (client: SqlClient.SqlClient, index: string) => {
|
|
732
|
+
const load = SqlSchema.findOne({
|
|
733
|
+
Request: Schema.String,
|
|
734
|
+
Result: checkpointRow,
|
|
735
|
+
execute: (name) =>
|
|
736
|
+
client`SELECT index_name, version, last_event_seq, event_count, updated_at
|
|
737
|
+
FROM index_checkpoints
|
|
738
|
+
WHERE index_name = ${name}`
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
return load(index).pipe(
|
|
742
|
+
Effect.flatMap(
|
|
743
|
+
Option.match({
|
|
744
|
+
onNone: () => Effect.succeed(Option.none()),
|
|
745
|
+
onSome: (row) => decodeCheckpointRow(row).pipe(Effect.map(Option.some))
|
|
746
|
+
})
|
|
747
|
+
)
|
|
748
|
+
);
|
|
749
|
+
};
|
|
750
|
+
|
|
751
|
+
const saveCheckpointWithClient = (client: SqlClient.SqlClient, checkpoint: IndexCheckpoint) => {
|
|
752
|
+
const updatedAt = toIso(checkpoint.updatedAt);
|
|
753
|
+
return client`INSERT INTO index_checkpoints (index_name, version, last_event_seq, event_count, updated_at)
|
|
754
|
+
VALUES (${checkpoint.index}, ${checkpoint.version}, ${checkpoint.lastEventSeq}, ${checkpoint.eventCount}, ${updatedAt})
|
|
755
|
+
ON CONFLICT(index_name) DO UPDATE SET
|
|
756
|
+
version = excluded.version,
|
|
757
|
+
last_event_seq = excluded.last_event_seq,
|
|
758
|
+
event_count = excluded.event_count,
|
|
759
|
+
updated_at = excluded.updated_at`.pipe(Effect.asVoid);
|
|
760
|
+
};
|
|
761
|
+
|
|
762
|
+
const applyWithClient = (client: SqlClient.SqlClient, record: PostEventRecord) =>
|
|
763
|
+
Effect.gen(function* () {
|
|
764
|
+
if (record.event._tag === "PostUpsert") {
|
|
765
|
+
yield* applyUpsert(client, record.event);
|
|
766
|
+
return;
|
|
767
|
+
}
|
|
768
|
+
if (record.event._tag === "PostDelete") {
|
|
769
|
+
yield* applyDelete(client, record.event);
|
|
770
|
+
}
|
|
771
|
+
});
|
|
772
|
+
|
|
773
|
+
const rebuildWithClient = (store: StoreRef, client: SqlClient.SqlClient) =>
|
|
774
|
+
Effect.gen(function* () {
|
|
775
|
+
const checkpoint = yield* loadCheckpointWithClient(client, indexName);
|
|
776
|
+
const lastEventSeq = Option.map(checkpoint, (value) => value.lastEventSeq);
|
|
777
|
+
|
|
778
|
+
const stream = eventLog.stream(store).pipe(
|
|
779
|
+
Stream.filter((entry) =>
|
|
780
|
+
Option.match(lastEventSeq, {
|
|
781
|
+
onNone: () => true,
|
|
782
|
+
onSome: (seq) => entry.seq > seq
|
|
783
|
+
})
|
|
784
|
+
)
|
|
785
|
+
);
|
|
786
|
+
|
|
787
|
+
const state = yield* stream.pipe(
|
|
788
|
+
Stream.grouped(entryPageSize),
|
|
789
|
+
Stream.runFoldEffect(
|
|
790
|
+
{
|
|
791
|
+
count: 0,
|
|
792
|
+
lastSeq: Option.none<EventSeq>()
|
|
793
|
+
},
|
|
794
|
+
(state, batch) =>
|
|
795
|
+
client.withTransaction(
|
|
796
|
+
Effect.gen(function* () {
|
|
797
|
+
for (const record of batch) {
|
|
798
|
+
yield* applyWithClient(client, record.record);
|
|
799
|
+
}
|
|
800
|
+
const size = Chunk.size(batch);
|
|
801
|
+
const lastEntry = size > 0 ? Option.some(Chunk.unsafeLast(batch).seq) : state.lastSeq;
|
|
802
|
+
return {
|
|
803
|
+
count: state.count + size,
|
|
804
|
+
lastSeq: lastEntry
|
|
805
|
+
};
|
|
806
|
+
})
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
);
|
|
810
|
+
|
|
811
|
+
if (state.count === 0 || Option.isNone(state.lastSeq)) {
|
|
812
|
+
return;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
const updatedAt = yield* Clock.currentTimeMillis.pipe(
|
|
816
|
+
Effect.flatMap((now) =>
|
|
817
|
+
Schema.decodeUnknown(Timestamp)(new Date(now).toISOString())
|
|
818
|
+
)
|
|
819
|
+
);
|
|
820
|
+
const nextCheckpoint = IndexCheckpoint.make({
|
|
821
|
+
index: indexName,
|
|
822
|
+
version: 1,
|
|
823
|
+
lastEventSeq: state.lastSeq.value,
|
|
824
|
+
eventCount: Option.match(checkpoint, {
|
|
825
|
+
onNone: () => state.count,
|
|
826
|
+
onSome: (value) => value.eventCount + state.count
|
|
827
|
+
}),
|
|
828
|
+
updatedAt
|
|
829
|
+
});
|
|
830
|
+
|
|
831
|
+
yield* saveCheckpointWithClient(client, nextCheckpoint);
|
|
832
|
+
yield* client`ANALYZE`;
|
|
833
|
+
yield* client`PRAGMA optimize`;
|
|
834
|
+
});
|
|
835
|
+
|
|
836
|
+
const bootstrapStore = (store: StoreRef, client: SqlClient.SqlClient) =>
|
|
837
|
+
Effect.gen(function* () {
|
|
838
|
+
const countRows = yield* client`SELECT COUNT(*) as count FROM posts`;
|
|
839
|
+
const count = Number(countRows[0]?.count ?? 0);
|
|
840
|
+
if (count > 0) {
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
const lastEventSeq = yield* eventLog.getLastEventSeq(store);
|
|
845
|
+
if (Option.isNone(lastEventSeq)) {
|
|
846
|
+
return;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
yield* rebuildWithClient(store, client);
|
|
850
|
+
});
|
|
851
|
+
|
|
852
|
+
const apply = Effect.fn("StoreIndex.apply")(
|
|
853
|
+
(store: StoreRef, record: PostEventRecord) =>
|
|
854
|
+
withClient(store, "StoreIndex.apply failed", (client) =>
|
|
855
|
+
client.withTransaction(applyWithClient(client, record))
|
|
856
|
+
)
|
|
857
|
+
);
|
|
858
|
+
|
|
859
|
+
const getByDate = Effect.fn("StoreIndex.getByDate")(
|
|
860
|
+
(store: StoreRef, date: string) =>
|
|
861
|
+
withClient(store, "StoreIndex.getByDate failed", (client) => {
|
|
862
|
+
const find = SqlSchema.findAll({
|
|
863
|
+
Request: Schema.String,
|
|
864
|
+
Result: postUriRow,
|
|
865
|
+
execute: (value) =>
|
|
866
|
+
client`SELECT uri FROM posts WHERE created_date = ${value} ORDER BY created_at ASC`
|
|
867
|
+
});
|
|
868
|
+
|
|
869
|
+
return find(date).pipe(
|
|
870
|
+
Effect.map((rows) => rows.map((row) => row.uri))
|
|
871
|
+
);
|
|
872
|
+
})
|
|
873
|
+
);
|
|
874
|
+
|
|
875
|
+
const getByHashtag = Effect.fn("StoreIndex.getByHashtag")(
|
|
876
|
+
(store: StoreRef, tag: string) =>
|
|
877
|
+
withClient(store, "StoreIndex.getByHashtag failed", (client) => {
|
|
878
|
+
const find = SqlSchema.findAll({
|
|
879
|
+
Request: Schema.String,
|
|
880
|
+
Result: postUriRow,
|
|
881
|
+
execute: (value) =>
|
|
882
|
+
client`SELECT uri FROM post_hashtag WHERE tag = ${value} ORDER BY uri ASC`
|
|
883
|
+
});
|
|
884
|
+
|
|
885
|
+
return find(tag).pipe(
|
|
886
|
+
Effect.map((rows) => rows.map((row) => row.uri))
|
|
887
|
+
);
|
|
888
|
+
})
|
|
889
|
+
);
|
|
890
|
+
|
|
891
|
+
const getPost = Effect.fn("StoreIndex.getPost")(
|
|
892
|
+
(store: StoreRef, uri: PostUri) =>
|
|
893
|
+
withClient(store, "StoreIndex.getPost failed", (client) => {
|
|
894
|
+
const find = SqlSchema.findOne({
|
|
895
|
+
Request: PostUri,
|
|
896
|
+
Result: postJsonRow,
|
|
897
|
+
execute: (value) =>
|
|
898
|
+
client`SELECT post_json FROM posts WHERE uri = ${value}`
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
return find(uri).pipe(
|
|
902
|
+
Effect.flatMap(
|
|
903
|
+
Option.match({
|
|
904
|
+
onNone: () => Effect.succeed(Option.none()),
|
|
905
|
+
onSome: (row) =>
|
|
906
|
+
decodePostJson(row.post_json).pipe(Effect.map(Option.some))
|
|
907
|
+
})
|
|
908
|
+
)
|
|
909
|
+
);
|
|
910
|
+
})
|
|
911
|
+
);
|
|
912
|
+
|
|
913
|
+
const hasUri = Effect.fn("StoreIndex.hasUri")((store: StoreRef, uri: PostUri) =>
|
|
914
|
+
withClient(store, "StoreIndex.hasUri failed", (client) =>
|
|
915
|
+
client`SELECT 1 FROM posts WHERE uri = ${uri} LIMIT 1`.pipe(
|
|
916
|
+
Effect.map((rows) => rows.length > 0)
|
|
917
|
+
)
|
|
918
|
+
)
|
|
919
|
+
);
|
|
920
|
+
|
|
921
|
+
const clear = Effect.fn("StoreIndex.clear")((store: StoreRef) =>
|
|
922
|
+
withClient(store, "StoreIndex.clear failed", (client) =>
|
|
923
|
+
client.withTransaction(
|
|
924
|
+
Effect.gen(function* () {
|
|
925
|
+
yield* client`DELETE FROM post_hashtag`;
|
|
926
|
+
yield* client`DELETE FROM post_lang`;
|
|
927
|
+
yield* client`DELETE FROM posts`;
|
|
928
|
+
yield* client`DELETE FROM index_checkpoints`;
|
|
929
|
+
})
|
|
930
|
+
)
|
|
931
|
+
)
|
|
932
|
+
);
|
|
933
|
+
|
|
934
|
+
const loadCheckpoint = Effect.fn("StoreIndex.loadCheckpoint")(
|
|
935
|
+
(store: StoreRef, index: string) =>
|
|
936
|
+
withClient(store, "StoreIndex.loadCheckpoint failed", (client) =>
|
|
937
|
+
loadCheckpointWithClient(client, index)
|
|
938
|
+
)
|
|
939
|
+
);
|
|
940
|
+
|
|
941
|
+
const saveCheckpoint = Effect.fn("StoreIndex.saveCheckpoint")(
|
|
942
|
+
(store: StoreRef, checkpoint: IndexCheckpoint) =>
|
|
943
|
+
withClient(store, "StoreIndex.saveCheckpoint failed", (client) =>
|
|
944
|
+
saveCheckpointWithClient(client, checkpoint)
|
|
945
|
+
)
|
|
946
|
+
);
|
|
947
|
+
|
|
948
|
+
const query = (store: StoreRef, q: StoreQuery) => {
|
|
949
|
+
const start = q.range ? toIso(q.range.start) : undefined;
|
|
950
|
+
const end = q.range ? toIso(q.range.end) : undefined;
|
|
951
|
+
const scanLimit = q.scanLimit;
|
|
952
|
+
const order = q.order === "desc" ? "DESC" : "ASC";
|
|
953
|
+
const pushdownExpr = buildPushdown(q.filter);
|
|
954
|
+
|
|
955
|
+
const initialState: QueryCursorState = {
|
|
956
|
+
lastCreatedAt: undefined,
|
|
957
|
+
lastUri: undefined,
|
|
958
|
+
fetched: 0
|
|
959
|
+
};
|
|
960
|
+
|
|
961
|
+
return Stream.paginateChunkEffect(
|
|
962
|
+
initialState,
|
|
963
|
+
({ lastCreatedAt, lastUri, fetched }) =>
|
|
964
|
+
withClient(store, "StoreIndex.query failed", (client) =>
|
|
965
|
+
Effect.gen(function* () {
|
|
966
|
+
if (scanLimit !== undefined && fetched >= scanLimit) {
|
|
967
|
+
return [Chunk.empty<Post>(), Option.none<QueryCursorState>()] as const;
|
|
968
|
+
}
|
|
969
|
+
const pageSize =
|
|
970
|
+
scanLimit !== undefined
|
|
971
|
+
? Math.min(entryPageSize, scanLimit - fetched)
|
|
972
|
+
: entryPageSize;
|
|
973
|
+
|
|
974
|
+
const rangeClause =
|
|
975
|
+
start && end
|
|
976
|
+
? client`p.created_at >= ${start} AND p.created_at <= ${end}`
|
|
977
|
+
: undefined;
|
|
978
|
+
const keysetClause =
|
|
979
|
+
lastCreatedAt && lastUri
|
|
980
|
+
? order === "ASC"
|
|
981
|
+
? client`(p.created_at > ${lastCreatedAt} OR (p.created_at = ${lastCreatedAt} AND p.uri > ${lastUri}))`
|
|
982
|
+
: client`(p.created_at < ${lastCreatedAt} OR (p.created_at = ${lastCreatedAt} AND p.uri < ${lastUri}))`
|
|
983
|
+
: undefined;
|
|
984
|
+
const pushdownClause = pushdownToSql(client, pushdownExpr);
|
|
985
|
+
const whereParts = [
|
|
986
|
+
...(rangeClause ? [rangeClause] : []),
|
|
987
|
+
...(keysetClause ? [keysetClause] : []),
|
|
988
|
+
...(pushdownClause ? [pushdownClause] : [])
|
|
989
|
+
];
|
|
990
|
+
const where = client.and(whereParts);
|
|
991
|
+
|
|
992
|
+
const rows = yield* client`SELECT post_json FROM posts p
|
|
993
|
+
WHERE ${where}
|
|
994
|
+
ORDER BY p.created_at ${client.unsafe(order)}, p.uri ${client.unsafe(order)}
|
|
995
|
+
LIMIT ${pageSize}`;
|
|
996
|
+
|
|
997
|
+
const decoded = yield* Schema.decodeUnknown(
|
|
998
|
+
Schema.Array(postJsonRow)
|
|
999
|
+
)(rows).pipe(
|
|
1000
|
+
Effect.mapError(toStoreIndexError("StoreIndex.query decode failed"))
|
|
1001
|
+
);
|
|
1002
|
+
|
|
1003
|
+
const posts = yield* Effect.forEach(
|
|
1004
|
+
decoded,
|
|
1005
|
+
(row) => decodePostJson(row.post_json),
|
|
1006
|
+
{ discard: false }
|
|
1007
|
+
);
|
|
1008
|
+
|
|
1009
|
+
const newFetched = fetched + posts.length;
|
|
1010
|
+
const done =
|
|
1011
|
+
posts.length < pageSize ||
|
|
1012
|
+
(scanLimit !== undefined && newFetched >= scanLimit);
|
|
1013
|
+
const lastPost = posts.length > 0 ? posts[posts.length - 1] : undefined;
|
|
1014
|
+
const nextCreatedAt = lastPost ? toIso(lastPost.createdAt) : lastCreatedAt;
|
|
1015
|
+
const nextUri = lastPost ? lastPost.uri : lastUri;
|
|
1016
|
+
|
|
1017
|
+
const next = done
|
|
1018
|
+
? Option.none<QueryCursorState>()
|
|
1019
|
+
: Option.some({ lastCreatedAt: nextCreatedAt, lastUri: nextUri, fetched: newFetched });
|
|
1020
|
+
|
|
1021
|
+
return [Chunk.fromIterable(posts), next] as const;
|
|
1022
|
+
})
|
|
1023
|
+
)
|
|
1024
|
+
);
|
|
1025
|
+
};
|
|
1026
|
+
|
|
1027
|
+
const searchPosts = Effect.fn("StoreIndex.searchPosts")(
|
|
1028
|
+
(store: StoreRef, input: { readonly query: string; readonly limit?: number; readonly cursor?: number; readonly sort?: SearchSort }) =>
|
|
1029
|
+
withClient(store, "StoreIndex.searchPosts failed", (client) =>
|
|
1030
|
+
Effect.gen(function* () {
|
|
1031
|
+
const rawQuery = input.query.trim();
|
|
1032
|
+
if (rawQuery.length === 0) {
|
|
1033
|
+
return { posts: [] as ReadonlyArray<Post> };
|
|
1034
|
+
}
|
|
1035
|
+
const literalQuery = buildLiteralFtsQuery(rawQuery);
|
|
1036
|
+
const useRaw = hasFtsSyntax(rawQuery);
|
|
1037
|
+
const query = useRaw ? rawQuery : literalQuery;
|
|
1038
|
+
const limit = input.limit && input.limit > 0 ? input.limit : 25;
|
|
1039
|
+
const offset = input.cursor && input.cursor > 0 ? input.cursor : 0;
|
|
1040
|
+
const sort = input.sort ?? "relevance";
|
|
1041
|
+
const orderBy =
|
|
1042
|
+
sort === "relevance"
|
|
1043
|
+
? "bm25(posts_fts)"
|
|
1044
|
+
: sort === "oldest"
|
|
1045
|
+
? "p.created_at ASC, p.uri ASC"
|
|
1046
|
+
: "p.created_at DESC, p.uri DESC";
|
|
1047
|
+
|
|
1048
|
+
const runSearch = (ftsQuery: string) =>
|
|
1049
|
+
client`SELECT p.post_json FROM posts_fts
|
|
1050
|
+
JOIN posts p ON p.rowid = posts_fts.rowid
|
|
1051
|
+
WHERE posts_fts MATCH ${ftsQuery}
|
|
1052
|
+
ORDER BY ${client.unsafe(orderBy)}
|
|
1053
|
+
LIMIT ${limit} OFFSET ${offset}`;
|
|
1054
|
+
|
|
1055
|
+
const rows = yield* runSearch(query).pipe(
|
|
1056
|
+
Effect.catchAll((error) =>
|
|
1057
|
+
useRaw && literalQuery !== query
|
|
1058
|
+
? runSearch(literalQuery)
|
|
1059
|
+
: Effect.fail(error)
|
|
1060
|
+
)
|
|
1061
|
+
);
|
|
1062
|
+
|
|
1063
|
+
const decoded = yield* Schema.decodeUnknown(
|
|
1064
|
+
Schema.Array(postJsonRow)
|
|
1065
|
+
)(rows).pipe(
|
|
1066
|
+
Effect.mapError(toStoreIndexError("StoreIndex.searchPosts decode failed"))
|
|
1067
|
+
);
|
|
1068
|
+
|
|
1069
|
+
const posts = yield* Effect.forEach(
|
|
1070
|
+
decoded,
|
|
1071
|
+
(row) => decodePostJson(row.post_json),
|
|
1072
|
+
{ discard: false }
|
|
1073
|
+
);
|
|
1074
|
+
|
|
1075
|
+
const nextCursor = posts.length < limit ? undefined : offset + posts.length;
|
|
1076
|
+
|
|
1077
|
+
return nextCursor !== undefined
|
|
1078
|
+
? { posts, cursor: nextCursor }
|
|
1079
|
+
: { posts };
|
|
1080
|
+
})
|
|
1081
|
+
)
|
|
1082
|
+
);
|
|
1083
|
+
|
|
1084
|
+
const entries = (store: StoreRef) =>
|
|
1085
|
+
Stream.paginateChunkEffect(0, (offset) =>
|
|
1086
|
+
withClient(store, "StoreIndex.entries failed", (client) =>
|
|
1087
|
+
Effect.gen(function* () {
|
|
1088
|
+
const rows = yield* client`SELECT
|
|
1089
|
+
p.uri as uri,
|
|
1090
|
+
p.created_date as created_date,
|
|
1091
|
+
p.author as author,
|
|
1092
|
+
group_concat(h.tag) as hashtags
|
|
1093
|
+
FROM posts p
|
|
1094
|
+
LEFT JOIN post_hashtag h ON p.uri = h.uri
|
|
1095
|
+
GROUP BY p.uri
|
|
1096
|
+
ORDER BY p.created_at ASC
|
|
1097
|
+
LIMIT ${entryPageSize} OFFSET ${offset}`;
|
|
1098
|
+
|
|
1099
|
+
const decoded = yield* Schema.decodeUnknown(
|
|
1100
|
+
Schema.Array(postEntryRow)
|
|
1101
|
+
)(rows).pipe(
|
|
1102
|
+
Effect.mapError(toStoreIndexError("StoreIndex.entries decode failed"))
|
|
1103
|
+
);
|
|
1104
|
+
|
|
1105
|
+
const entries = yield* Effect.forEach(
|
|
1106
|
+
decoded,
|
|
1107
|
+
(row) => decodeEntryRow(row),
|
|
1108
|
+
{ discard: false }
|
|
1109
|
+
);
|
|
1110
|
+
|
|
1111
|
+
const next =
|
|
1112
|
+
entries.length < entryPageSize
|
|
1113
|
+
? Option.none<number>()
|
|
1114
|
+
: Option.some(offset + entryPageSize);
|
|
1115
|
+
|
|
1116
|
+
return [Chunk.fromIterable(entries), next] as const;
|
|
1117
|
+
})
|
|
1118
|
+
)
|
|
1119
|
+
);
|
|
1120
|
+
|
|
1121
|
+
const count = Effect.fn("StoreIndex.count")((store: StoreRef) =>
|
|
1122
|
+
withClient(store, "StoreIndex.count failed", (client) =>
|
|
1123
|
+
client`SELECT COUNT(*) as count FROM posts`.pipe(
|
|
1124
|
+
Effect.map((rows) => Number(rows[0]?.count ?? 0))
|
|
1125
|
+
)
|
|
1126
|
+
)
|
|
1127
|
+
);
|
|
1128
|
+
|
|
1129
|
+
const rebuild = Effect.fn("StoreIndex.rebuild")((store: StoreRef) =>
|
|
1130
|
+
withClient(store, "StoreIndex.rebuild failed", (client) =>
|
|
1131
|
+
rebuildWithClient(store, client)
|
|
1132
|
+
)
|
|
1133
|
+
);
|
|
1134
|
+
|
|
1135
|
+
return StoreIndex.of({
|
|
1136
|
+
apply,
|
|
1137
|
+
getByDate,
|
|
1138
|
+
getByHashtag,
|
|
1139
|
+
getPost,
|
|
1140
|
+
hasUri,
|
|
1141
|
+
clear,
|
|
1142
|
+
loadCheckpoint,
|
|
1143
|
+
saveCheckpoint,
|
|
1144
|
+
query,
|
|
1145
|
+
searchPosts,
|
|
1146
|
+
entries,
|
|
1147
|
+
count,
|
|
1148
|
+
rebuild
|
|
1149
|
+
});
|
|
1150
|
+
})
|
|
1151
|
+
);
|
|
1152
|
+
}
|