yrby 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +37 -0
- data/Cargo.toml +3 -0
- data/LICENSE +21 -0
- data/README.md +413 -0
- data/ext/yrby/Cargo.toml +19 -0
- data/ext/yrby/extconf.rb +6 -0
- data/ext/yrby/src/lib.rs +389 -0
- data/ext/yrby/src/protocol.rs +331 -0
- data/ext/yrby/src/read.rs +188 -0
- data/lib/y/decoder/version.rb +7 -0
- data/lib/y/decoder.rb +66 -0
- data/lib/y/version.rb +5 -0
- data/lib/y.rb +19 -0
- data/lib/yrby-decoder.rb +4 -0
- data/lib/yrby.rb +4 -0
- metadata +120 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
// Pure Rust protocol helpers (no Ruby interop).
|
|
2
|
+
//
|
|
3
|
+
// Client ids are not validated here, on purpose: every legitimate peer (browser
|
|
4
|
+
// Yjs, and yrs's own `ClientID::random`) already emits 53-bit ids, so it's the
|
|
5
|
+
// client's responsibility not to send a bad one, and we don't want to own that
|
|
6
|
+
// logic.
|
|
7
|
+
use yrs::encoding::read::{Cursor, Read};
|
|
8
|
+
use yrs::sync::protocol::MessageReader;
|
|
9
|
+
use yrs::sync::{Message, SyncMessage};
|
|
10
|
+
use yrs::updates::decoder::{Decode, DecoderV1};
|
|
11
|
+
use yrs::{Doc, ReadTxn, Transact};
|
|
12
|
+
|
|
13
|
+
/// Classify a frame: a non-zero code only for exactly one well-formed message
|
|
14
|
+
/// that consumes the whole buffer (the codes are the match arms below).
|
|
15
|
+
pub(crate) fn classify_message(bytes: &[u8]) -> u8 {
|
|
16
|
+
let mut decoder = DecoderV1::new(Cursor::new(bytes));
|
|
17
|
+
let msg = match Message::decode(&mut decoder) {
|
|
18
|
+
Ok(msg) => msg,
|
|
19
|
+
Err(_) => return 0, // empty or malformed
|
|
20
|
+
};
|
|
21
|
+
// Any remaining byte means a second message or trailing garbage.
|
|
22
|
+
if decoder.read_u8().is_ok() {
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
match msg {
|
|
26
|
+
Message::Sync(SyncMessage::SyncStep1(_)) => 1,
|
|
27
|
+
Message::Sync(SyncMessage::SyncStep2(_)) | Message::Sync(SyncMessage::Update(_)) => 2,
|
|
28
|
+
Message::Awareness(_) => 3,
|
|
29
|
+
Message::AwarenessQuery => 4,
|
|
30
|
+
_ => 0, // Auth / Custom: not part of our model
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/// Merge the document-update deltas (Update / SyncStep2 payloads) carried by a
|
|
35
|
+
/// frame into one update, or `None` if the frame carries no document change
|
|
36
|
+
/// (a request, an awareness update, or a no-op handshake SyncStep2).
|
|
37
|
+
pub(crate) fn merged_doc_update(bytes: &[u8]) -> Result<Option<Vec<u8>>, String> {
|
|
38
|
+
let mut decoder = DecoderV1::new(Cursor::new(bytes));
|
|
39
|
+
let mut updates: Vec<Vec<u8>> = Vec::new();
|
|
40
|
+
for msg in MessageReader::new(&mut decoder) {
|
|
41
|
+
match msg.map_err(|e| e.to_string())? {
|
|
42
|
+
Message::Sync(SyncMessage::Update(u)) | Message::Sync(SyncMessage::SyncStep2(u)) => {
|
|
43
|
+
updates.push(u)
|
|
44
|
+
}
|
|
45
|
+
_ => {}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
let merged = match updates.len() {
|
|
49
|
+
0 => return Ok(None),
|
|
50
|
+
1 => updates.pop().unwrap(),
|
|
51
|
+
_ => yrs::merge_updates_v1(&updates).map_err(|e| e.to_string())?,
|
|
52
|
+
};
|
|
53
|
+
let update = yrs::Update::decode_v1(&merged).map_err(|e| e.to_string())?;
|
|
54
|
+
// A genuine no-op (e.g. the empty SyncStep2 in an opening handshake) carries
|
|
55
|
+
// no structs, no deletes, and no dependencies. We must NOT treat a causally-
|
|
56
|
+
// pending update as a no-op: such an update reports an empty
|
|
57
|
+
// state_vector (its structs can't integrate yet), but it still carries
|
|
58
|
+
// content and a non-empty lower bound (the deps it's waiting on). Dropping it
|
|
59
|
+
// here would silently swallow a gappy update instead of rejecting + resyncing.
|
|
60
|
+
if update.state_vector().is_empty()
|
|
61
|
+
&& update.delete_set().is_empty()
|
|
62
|
+
&& update.state_vector_lower().is_empty()
|
|
63
|
+
{
|
|
64
|
+
return Ok(None);
|
|
65
|
+
}
|
|
66
|
+
Ok(Some(merged))
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// True if applying `update_bytes` to `doc` would integrate cleanly: every
|
|
70
|
+
/// dependency the update references is already present (the doc's state vector
|
|
71
|
+
/// covers the update's lower bound). A pure read; does not mutate the doc.
|
|
72
|
+
/// When false, applying it would park a pending struct, the signal that an
|
|
73
|
+
/// earlier, causally-prior update is missing.
|
|
74
|
+
pub(crate) fn update_is_ready(doc: &Doc, update_bytes: &[u8]) -> Result<bool, String> {
|
|
75
|
+
let update = yrs::Update::decode_v1(update_bytes).map_err(|e| e.to_string())?;
|
|
76
|
+
Ok(doc.transact().state_vector() >= update.state_vector_lower())
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/// True if applying `update_bytes` would actually change `doc`, i.e. it carries
|
|
80
|
+
/// content the doc doesn't already have. This lets the server make durable side
|
|
81
|
+
/// effects exactly-once: a lost-ack retry re-sends an update the server already
|
|
82
|
+
/// applied; that retry is causally ready (so `update_is_ready` is true) but must
|
|
83
|
+
/// not re-run `on_change`.
|
|
84
|
+
///
|
|
85
|
+
/// We can't read the update's own state vector to decide this: yrs reports an
|
|
86
|
+
/// empty state_vector() for a causally-pending diff (e.g. a resync delta whose
|
|
87
|
+
/// structs depend on updates the doc has but the standalone update doesn't),
|
|
88
|
+
/// which would look identical to a no-op. So measure the real effect: seed an
|
|
89
|
+
/// independent probe with the doc's current state, apply the update there, and
|
|
90
|
+
/// see whether the state vector grew. Deletes don't move the state vector, so we
|
|
91
|
+
/// can't cheaply prove a delete-bearing update is a duplicate; we conservatively
|
|
92
|
+
/// report it as advancing (record it). That can still double-record a pure-delete
|
|
93
|
+
/// retry, but it never drops a real deletion, which is the safe direction.
|
|
94
|
+
/// Assumes the update is already causally ready.
|
|
95
|
+
pub(crate) fn update_advances_doc(doc: &Doc, update_bytes: &[u8]) -> Result<bool, String> {
|
|
96
|
+
let update = yrs::Update::decode_v1(update_bytes).map_err(|e| e.to_string())?;
|
|
97
|
+
if !update.delete_set().is_empty() {
|
|
98
|
+
return Ok(true); // can't cheaply prove a delete is a duplicate; record it
|
|
99
|
+
}
|
|
100
|
+
let probe = Doc::new();
|
|
101
|
+
let current = doc
|
|
102
|
+
.transact()
|
|
103
|
+
.encode_state_as_update_v1(&yrs::StateVector::default());
|
|
104
|
+
probe
|
|
105
|
+
.transact_mut()
|
|
106
|
+
.apply_update(yrs::Update::decode_v1(¤t).map_err(|e| e.to_string())?)
|
|
107
|
+
.map_err(|e| e.to_string())?;
|
|
108
|
+
let before = probe.transact().state_vector();
|
|
109
|
+
probe
|
|
110
|
+
.transact_mut()
|
|
111
|
+
.apply_update(update)
|
|
112
|
+
.map_err(|e| e.to_string())?;
|
|
113
|
+
let after = probe.transact().state_vector();
|
|
114
|
+
Ok(after != before)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/// True if the doc holds pending structs or a pending delete set: blocks that
|
|
118
|
+
/// couldn't integrate because a dependency is missing. Test-only: asserts the
|
|
119
|
+
/// causal-chain parking behavior in the unit tests below.
|
|
120
|
+
#[cfg(test)]
|
|
121
|
+
pub(crate) fn doc_has_pending(doc: &Doc) -> bool {
|
|
122
|
+
let txn = doc.transact();
|
|
123
|
+
txn.store().pending_update().is_some() || txn.store().pending_ds().is_some()
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#[cfg(test)]
|
|
127
|
+
mod tests {
|
|
128
|
+
use super::*;
|
|
129
|
+
use yrs::sync::Awareness;
|
|
130
|
+
use yrs::updates::encoder::Encode;
|
|
131
|
+
use yrs::Text;
|
|
132
|
+
|
|
133
|
+
fn text_update(content: &str) -> Vec<u8> {
|
|
134
|
+
let doc = Doc::new();
|
|
135
|
+
let text = doc.get_or_insert_text("content");
|
|
136
|
+
text.insert(&mut doc.transact_mut(), 0, content);
|
|
137
|
+
let update = doc
|
|
138
|
+
.transact()
|
|
139
|
+
.encode_state_as_update_v1(&yrs::StateVector::default());
|
|
140
|
+
update
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
fn update_frame(content: &str) -> Vec<u8> {
|
|
144
|
+
Message::Sync(SyncMessage::Update(text_update(content))).encode_v1()
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
fn step1_frame() -> Vec<u8> {
|
|
148
|
+
Message::Sync(SyncMessage::SyncStep1(yrs::StateVector::default())).encode_v1()
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
fn awareness_frame(client_id: u64) -> Vec<u8> {
|
|
152
|
+
let mut awareness = Awareness::new(Doc::with_client_id(client_id));
|
|
153
|
+
awareness
|
|
154
|
+
.set_local_state(serde_json::json!({ "user": "alice" }))
|
|
155
|
+
.unwrap();
|
|
156
|
+
Message::Awareness(awareness.update().unwrap()).encode_v1()
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
#[test]
|
|
160
|
+
fn classify_accepts_clean_single_messages() {
|
|
161
|
+
assert_eq!(classify_message(&step1_frame()), 1);
|
|
162
|
+
assert_eq!(classify_message(&update_frame("hi")), 2);
|
|
163
|
+
assert_eq!(classify_message(&awareness_frame(7)), 3);
|
|
164
|
+
assert_eq!(classify_message(&Message::AwarenessQuery.encode_v1()), 4);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
#[test]
|
|
168
|
+
fn classify_rejects_unsafe_frames() {
|
|
169
|
+
assert_eq!(classify_message(b""), 0, "empty");
|
|
170
|
+
assert_eq!(classify_message(&[0xff, 0xff, 0xff]), 0, "garbage");
|
|
171
|
+
assert_eq!(classify_message(&[0x63, 0x63, 0x63]), 0, "unknown type");
|
|
172
|
+
|
|
173
|
+
let mut two = update_frame("a");
|
|
174
|
+
two.extend(awareness_frame(1)); // two messages packed together
|
|
175
|
+
assert_eq!(classify_message(&two), 0, "multi-message");
|
|
176
|
+
|
|
177
|
+
let mut trailing = update_frame("a");
|
|
178
|
+
trailing.extend_from_slice(&[0xde, 0xad]);
|
|
179
|
+
assert_eq!(classify_message(&trailing), 0, "trailing garbage");
|
|
180
|
+
|
|
181
|
+
let frame = update_frame("hello");
|
|
182
|
+
assert_eq!(classify_message(&frame[..frame.len() / 2]), 0, "truncated");
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
#[test]
|
|
186
|
+
fn update_advances_is_false_for_an_already_applied_retry() {
|
|
187
|
+
let doc = Doc::new();
|
|
188
|
+
let upd = text_update("hello");
|
|
189
|
+
|
|
190
|
+
// Against a doc that doesn't have it yet, the update advances.
|
|
191
|
+
assert!(
|
|
192
|
+
update_advances_doc(&doc, &upd).unwrap(),
|
|
193
|
+
"new content advances"
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
// Apply it, then the byte-identical retry no longer advances.
|
|
197
|
+
doc.transact_mut()
|
|
198
|
+
.apply_update(yrs::Update::decode_v1(&upd).unwrap())
|
|
199
|
+
.unwrap();
|
|
200
|
+
assert!(
|
|
201
|
+
!update_advances_doc(&doc, &upd).unwrap(),
|
|
202
|
+
"an already-applied retry does not advance"
|
|
203
|
+
);
|
|
204
|
+
|
|
205
|
+
// A genuinely new insert (from a different client) still advances.
|
|
206
|
+
let more = text_update("world");
|
|
207
|
+
assert!(
|
|
208
|
+
update_advances_doc(&doc, &more).unwrap(),
|
|
209
|
+
"different new content advances"
|
|
210
|
+
);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
#[test]
|
|
214
|
+
fn update_advances_handles_a_dependent_diff_update() {
|
|
215
|
+
// A causally-pending diff (its structs depend on content the doc already
|
|
216
|
+
// has) reports an empty state_vector() in isolation, which a naive check
|
|
217
|
+
// would misread as a no-op. Verify the trial-apply gets it right.
|
|
218
|
+
let doc = Doc::new();
|
|
219
|
+
let text = doc.get_or_insert_text("content");
|
|
220
|
+
text.insert(&mut doc.transact_mut(), 0, "a");
|
|
221
|
+
let a_update = doc
|
|
222
|
+
.transact()
|
|
223
|
+
.encode_state_as_update_v1(&yrs::StateVector::default());
|
|
224
|
+
let sv_a = doc.transact().state_vector();
|
|
225
|
+
text.insert(&mut doc.transact_mut(), 1, "b");
|
|
226
|
+
let diff = doc.transact().encode_state_as_update_v1(&sv_a); // depends on "a"
|
|
227
|
+
|
|
228
|
+
// A server that has only "a".
|
|
229
|
+
let server = Doc::new();
|
|
230
|
+
server
|
|
231
|
+
.transact_mut()
|
|
232
|
+
.apply_update(yrs::Update::decode_v1(&a_update).unwrap())
|
|
233
|
+
.unwrap();
|
|
234
|
+
|
|
235
|
+
assert!(
|
|
236
|
+
update_advances_doc(&server, &diff).unwrap(),
|
|
237
|
+
"a dependent diff carrying new content advances"
|
|
238
|
+
);
|
|
239
|
+
server
|
|
240
|
+
.transact_mut()
|
|
241
|
+
.apply_update(yrs::Update::decode_v1(&diff).unwrap())
|
|
242
|
+
.unwrap();
|
|
243
|
+
assert!(
|
|
244
|
+
!update_advances_doc(&server, &diff).unwrap(),
|
|
245
|
+
"the byte-identical retry of that diff does not advance"
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
#[test]
|
|
250
|
+
fn merged_doc_update_extracts_and_skips_no_ops() {
|
|
251
|
+
// A document update yields a delta that reconstructs the content.
|
|
252
|
+
let delta = merged_doc_update(&update_frame("hello"))
|
|
253
|
+
.unwrap()
|
|
254
|
+
.expect("a document update");
|
|
255
|
+
let doc = Doc::new();
|
|
256
|
+
doc.transact_mut()
|
|
257
|
+
.apply_update(yrs::Update::decode_v1(&delta).unwrap())
|
|
258
|
+
.unwrap();
|
|
259
|
+
// The delta carried real content, so applying it advances the doc.
|
|
260
|
+
assert!(!doc.transact().state_vector().is_empty());
|
|
261
|
+
|
|
262
|
+
// A SyncStep1 request carries no document change.
|
|
263
|
+
assert!(merged_doc_update(&step1_frame()).unwrap().is_none());
|
|
264
|
+
|
|
265
|
+
// An empty SyncStep2 (no new structs) is a no-op.
|
|
266
|
+
let empty = Message::Sync(SyncMessage::SyncStep2(
|
|
267
|
+
Doc::new()
|
|
268
|
+
.transact()
|
|
269
|
+
.encode_state_as_update_v1(&yrs::StateVector::default()),
|
|
270
|
+
))
|
|
271
|
+
.encode_v1();
|
|
272
|
+
assert!(merged_doc_update(&empty).unwrap().is_none());
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
#[test]
|
|
276
|
+
fn merged_doc_update_merges_multiple_updates() {
|
|
277
|
+
// Two updates from different clients packed in one frame merge into one.
|
|
278
|
+
let mut frame = update_frame("a");
|
|
279
|
+
frame.extend(update_frame("b"));
|
|
280
|
+
let merged = merged_doc_update(&frame).unwrap().expect("merged update");
|
|
281
|
+
|
|
282
|
+
// The merged update must decode cleanly as a single update.
|
|
283
|
+
assert!(yrs::Update::decode_v1(&merged).is_ok());
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
#[test]
|
|
287
|
+
fn update_readiness_and_pending_detect_a_causal_gap() {
|
|
288
|
+
// Three sequential single-char inserts from one client: A, then B, then
|
|
289
|
+
// C. Each delta depends on the previous, so C can't integrate without B.
|
|
290
|
+
let src = Doc::new();
|
|
291
|
+
let txt = src.get_or_insert_text("t");
|
|
292
|
+
let mut deltas: Vec<Vec<u8>> = Vec::new();
|
|
293
|
+
let mut prev = yrs::StateVector::default();
|
|
294
|
+
for (i, ch) in ["A", "B", "C"].into_iter().enumerate() {
|
|
295
|
+
txt.insert(&mut src.transact_mut(), i as u32, ch);
|
|
296
|
+
deltas.push(src.transact().encode_state_as_update_v1(&prev));
|
|
297
|
+
prev = src.transact().state_vector();
|
|
298
|
+
}
|
|
299
|
+
let (u1, u2, u3) = (&deltas[0], &deltas[1], &deltas[2]);
|
|
300
|
+
|
|
301
|
+
// A doc holding only u1 (u2 was lost in transit / its record failed):
|
|
302
|
+
let doc = Doc::new();
|
|
303
|
+
doc.transact_mut()
|
|
304
|
+
.apply_update(yrs::Update::decode_v1(u1).unwrap())
|
|
305
|
+
.unwrap();
|
|
306
|
+
assert!(update_is_ready(&doc, u1).unwrap(), "u1 has no missing deps");
|
|
307
|
+
assert!(
|
|
308
|
+
!update_is_ready(&doc, u3).unwrap(),
|
|
309
|
+
"u3 depends on the missing u2"
|
|
310
|
+
);
|
|
311
|
+
assert!(
|
|
312
|
+
!doc_has_pending(&doc),
|
|
313
|
+
"nothing pending until u3 is applied"
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
// Applying u3 anyway parks it as a pending struct.
|
|
317
|
+
doc.transact_mut()
|
|
318
|
+
.apply_update(yrs::Update::decode_v1(u3).unwrap())
|
|
319
|
+
.unwrap();
|
|
320
|
+
assert!(
|
|
321
|
+
doc_has_pending(&doc),
|
|
322
|
+
"u3 is pending: its parent u2 is missing"
|
|
323
|
+
);
|
|
324
|
+
|
|
325
|
+
// Once u2 arrives (via resync), u3 integrates and pending clears.
|
|
326
|
+
doc.transact_mut()
|
|
327
|
+
.apply_update(yrs::Update::decode_v1(u2).unwrap())
|
|
328
|
+
.unwrap();
|
|
329
|
+
assert!(!doc_has_pending(&doc), "u2 arrived; u3 integrated");
|
|
330
|
+
}
|
|
331
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
//! Pure content-reading helpers over yrs shared types — no magnus/Ruby, so they
|
|
2
|
+
//! can be unit-tested directly in Rust (like `protocol.rs`). The binding layer in
|
|
3
|
+
//! `lib.rs` is a thin wrapper that opens a transaction and calls these.
|
|
4
|
+
|
|
5
|
+
use std::collections::HashMap;
|
|
6
|
+
use std::sync::Arc;
|
|
7
|
+
use yrs::{Any, Array, GetString, Map, MapRef, Out, ReadTxn, XmlFragment, XmlFragmentRef, XmlOut};
|
|
8
|
+
|
|
9
|
+
/// Read an XML-shaped root as text, one top-level block per line.
|
|
10
|
+
///
|
|
11
|
+
/// ProseMirror stores blocks as `Y.XmlElement` children (`<paragraph>…`);
|
|
12
|
+
/// Lexical stores each block as a sibling `Y.XmlText` (its node metadata is an
|
|
13
|
+
/// embed, which yrs omits from the string). We serialize each top-level child and
|
|
14
|
+
/// join with "\n", so adjacent blocks don't merge into one run of words. Without
|
|
15
|
+
/// the separator, Lexical — whose blocks carry no element tags — would glue
|
|
16
|
+
/// paragraphs together (e.g. "first paragraphsecond paragraph"), breaking word
|
|
17
|
+
/// boundaries for search/preview. Element tags are kept (the caller strips them);
|
|
18
|
+
/// deeper nesting is flattened, but its inner tags still separate words after
|
|
19
|
+
/// stripping.
|
|
20
|
+
pub fn xml_blocks_text<T: ReadTxn>(txn: &T, fragment: &XmlFragmentRef) -> String {
|
|
21
|
+
fragment
|
|
22
|
+
.children(txn)
|
|
23
|
+
.map(|node| match node {
|
|
24
|
+
XmlOut::Element(e) => e.get_string(txn),
|
|
25
|
+
XmlOut::Text(t) => t.get_string(txn),
|
|
26
|
+
XmlOut::Fragment(f) => f.get_string(txn),
|
|
27
|
+
})
|
|
28
|
+
.collect::<Vec<_>>()
|
|
29
|
+
.join("\n")
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/// Read a `Y.Map` root as a JSON object string (keys sorted for stable output).
|
|
33
|
+
///
|
|
34
|
+
/// The complement to `read_text`/`read_xml` for structured state — e.g. a shared
|
|
35
|
+
/// "view state" map. Values are converted recursively: primitives pass through;
|
|
36
|
+
/// nested `Y.Map`/`Y.Array` recurse; `Y.Text`/XML values stringify. The caller
|
|
37
|
+
/// parses the JSON (yrs's own `Out::to_json` is crate-private, so we walk the
|
|
38
|
+
/// `Out` variants ourselves here).
|
|
39
|
+
pub fn map_json<T: ReadTxn>(txn: &T, map: &MapRef) -> String {
|
|
40
|
+
let mut pairs: Vec<(String, Any)> = map
|
|
41
|
+
.iter(txn)
|
|
42
|
+
.map(|(k, v)| (k.to_string(), out_to_any(txn, &v)))
|
|
43
|
+
.collect();
|
|
44
|
+
pairs.sort_by(|a, b| a.0.cmp(&b.0)); // deterministic key order
|
|
45
|
+
let mut out = String::from("{");
|
|
46
|
+
for (i, (k, v)) in pairs.iter().enumerate() {
|
|
47
|
+
if i > 0 {
|
|
48
|
+
out.push(',');
|
|
49
|
+
}
|
|
50
|
+
// Any::to_json serializes from the start of the buffer (it doesn't
|
|
51
|
+
// append), so each piece goes into its own String, then concatenated.
|
|
52
|
+
out.push_str(&any_to_json(&Any::String(Arc::from(k.as_str())))); // JSON-escaped key
|
|
53
|
+
out.push(':');
|
|
54
|
+
out.push_str(&any_to_json(v));
|
|
55
|
+
}
|
|
56
|
+
out.push('}');
|
|
57
|
+
out
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
fn any_to_json(a: &Any) -> String {
|
|
61
|
+
let mut s = String::new();
|
|
62
|
+
a.to_json(&mut s);
|
|
63
|
+
s
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/// Convert a yrs output value to an `Any` (which knows how to JSON-serialize),
|
|
67
|
+
/// recursing through nested shared collections.
|
|
68
|
+
fn out_to_any<T: ReadTxn>(txn: &T, out: &Out) -> Any {
|
|
69
|
+
match out {
|
|
70
|
+
Out::Any(a) => a.clone(),
|
|
71
|
+
Out::YText(v) => Any::from(v.get_string(txn)),
|
|
72
|
+
Out::YXmlText(v) => Any::from(v.get_string(txn)),
|
|
73
|
+
Out::YXmlElement(v) => Any::from(v.get_string(txn)),
|
|
74
|
+
Out::YXmlFragment(v) => Any::from(v.get_string(txn)),
|
|
75
|
+
Out::YArray(arr) => {
|
|
76
|
+
let items: Vec<Any> = arr.iter(txn).map(|o| out_to_any(txn, &o)).collect();
|
|
77
|
+
Any::Array(items.into())
|
|
78
|
+
}
|
|
79
|
+
Out::YMap(m) => {
|
|
80
|
+
let mut hm: HashMap<String, Any> = HashMap::new();
|
|
81
|
+
for (k, v) in m.iter(txn) {
|
|
82
|
+
hm.insert(k.to_string(), out_to_any(txn, &v));
|
|
83
|
+
}
|
|
84
|
+
Any::Map(Arc::new(hm))
|
|
85
|
+
}
|
|
86
|
+
_ => Any::Null,
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
#[cfg(test)]
|
|
91
|
+
mod tests {
|
|
92
|
+
use super::*;
|
|
93
|
+
use yrs::{Doc, MapPrelim, Transact, XmlElementPrelim, XmlTextPrelim};
|
|
94
|
+
|
|
95
|
+
#[test]
|
|
96
|
+
fn prosemirror_blocks_keep_tags_and_separate_with_newlines() {
|
|
97
|
+
let doc = Doc::new();
|
|
98
|
+
let frag = doc.get_or_insert_xml_fragment("pm");
|
|
99
|
+
{
|
|
100
|
+
let mut txn = doc.transact_mut();
|
|
101
|
+
let h = frag.push_back(&mut txn, XmlElementPrelim::empty("heading"));
|
|
102
|
+
h.push_back(&mut txn, XmlTextPrelim::new("Title"));
|
|
103
|
+
let p = frag.push_back(&mut txn, XmlElementPrelim::empty("paragraph"));
|
|
104
|
+
p.push_back(&mut txn, XmlTextPrelim::new("Body"));
|
|
105
|
+
}
|
|
106
|
+
let txn = doc.transact();
|
|
107
|
+
assert_eq!(
|
|
108
|
+
xml_blocks_text(&txn, &frag),
|
|
109
|
+
"<heading>Title</heading>\n<paragraph>Body</paragraph>"
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
#[test]
|
|
114
|
+
fn lexical_style_sibling_text_blocks_separate_with_newlines() {
|
|
115
|
+
// Lexical stores each block as a sibling XmlText with no element tags;
|
|
116
|
+
// this is the case a flat read glued together.
|
|
117
|
+
let doc = Doc::new();
|
|
118
|
+
let frag = doc.get_or_insert_xml_fragment("lex");
|
|
119
|
+
{
|
|
120
|
+
let mut txn = doc.transact_mut();
|
|
121
|
+
frag.push_back(&mut txn, XmlTextPrelim::new("first paragraph"));
|
|
122
|
+
frag.push_back(&mut txn, XmlTextPrelim::new("second paragraph"));
|
|
123
|
+
}
|
|
124
|
+
let txn = doc.transact();
|
|
125
|
+
assert_eq!(
|
|
126
|
+
xml_blocks_text(&txn, &frag),
|
|
127
|
+
"first paragraph\nsecond paragraph"
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
#[test]
|
|
132
|
+
fn single_block_has_no_trailing_separator() {
|
|
133
|
+
let doc = Doc::new();
|
|
134
|
+
let frag = doc.get_or_insert_xml_fragment("one");
|
|
135
|
+
{
|
|
136
|
+
let mut txn = doc.transact_mut();
|
|
137
|
+
frag.push_back(&mut txn, XmlTextPrelim::new("only"));
|
|
138
|
+
}
|
|
139
|
+
let txn = doc.transact();
|
|
140
|
+
assert_eq!(xml_blocks_text(&txn, &frag), "only");
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
#[test]
|
|
144
|
+
fn empty_fragment_is_blank() {
|
|
145
|
+
let doc = Doc::new();
|
|
146
|
+
let frag = doc.get_or_insert_xml_fragment("empty");
|
|
147
|
+
let txn = doc.transact();
|
|
148
|
+
assert_eq!(xml_blocks_text(&txn, &frag), "");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
#[test]
|
|
152
|
+
fn map_json_serializes_primitives_with_sorted_keys() {
|
|
153
|
+
let doc = Doc::new();
|
|
154
|
+
let map = doc.get_or_insert_map("state");
|
|
155
|
+
{
|
|
156
|
+
let mut txn = doc.transact_mut();
|
|
157
|
+
map.insert(&mut txn, "title", "Dashboard");
|
|
158
|
+
map.insert(&mut txn, "count", 3_i64);
|
|
159
|
+
map.insert(&mut txn, "active", true);
|
|
160
|
+
}
|
|
161
|
+
let txn = doc.transact();
|
|
162
|
+
assert_eq!(
|
|
163
|
+
map_json(&txn, &map),
|
|
164
|
+
r#"{"active":true,"count":3,"title":"Dashboard"}"#
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
#[test]
|
|
169
|
+
fn map_json_recurses_into_nested_map() {
|
|
170
|
+
let doc = Doc::new();
|
|
171
|
+
let map = doc.get_or_insert_map("state");
|
|
172
|
+
{
|
|
173
|
+
let mut txn = doc.transact_mut();
|
|
174
|
+
let inner = map.insert(&mut txn, "user", MapPrelim::default());
|
|
175
|
+
inner.insert(&mut txn, "name", "Ada");
|
|
176
|
+
}
|
|
177
|
+
let txn = doc.transact();
|
|
178
|
+
assert_eq!(map_json(&txn, &map), r#"{"user":{"name":"Ada"}}"#);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
#[test]
|
|
182
|
+
fn map_json_empty_is_object() {
|
|
183
|
+
let doc = Doc::new();
|
|
184
|
+
let map = doc.get_or_insert_map("state");
|
|
185
|
+
let txn = doc.transact();
|
|
186
|
+
assert_eq!(map_json(&txn, &map), "{}");
|
|
187
|
+
}
|
|
188
|
+
}
|
data/lib/y/decoder.rb
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "y"
|
|
4
|
+
require "y/decoder/version"
|
|
5
|
+
|
|
6
|
+
module Y
|
|
7
|
+
# Plain-text reconstruction of a stored Yjs document, in pure Ruby — for search
|
|
8
|
+
# indexing and previews. The core `yrby` gem moves and stores opaque CRDT
|
|
9
|
+
# updates without reading them; this reads the text out of the shared type the
|
|
10
|
+
# editor uses (Lexical's `Y.XmlText`, plain `Y.Text`, or ProseMirror's
|
|
11
|
+
# `Y.XmlFragment`), in-process, on the native extension core already ships — no
|
|
12
|
+
# Node, no subprocess, no binary.
|
|
13
|
+
#
|
|
14
|
+
# state = doc.encode_state_as_update # opaque CRDT bytes from the store
|
|
15
|
+
# Y::Decoder.text(state) # => "hello world"
|
|
16
|
+
# Y::Decoder.preview(state, 280) # => "hello world…"
|
|
17
|
+
#
|
|
18
|
+
# Full-fidelity reconstruction (the exact Lexical EditorState / HTML, which
|
|
19
|
+
# needs @lexical/yjs) is a separate, opt-in concern — see the `yrby-decode`
|
|
20
|
+
# package's Bun binary. This gem stays pure Ruby on purpose.
|
|
21
|
+
module Decoder
|
|
22
|
+
class Error < Y::Error; end
|
|
23
|
+
|
|
24
|
+
module_function
|
|
25
|
+
|
|
26
|
+
# Plain text of the document. `field` pins the root key (Lexical: the editor
|
|
27
|
+
# id; ProseMirror: "default"); omit it to use the document's sole root.
|
|
28
|
+
def text(state, field: nil)
|
|
29
|
+
field ||= Y::Doc.new.tap { |d| d.apply_update(state) }.root_names.first
|
|
30
|
+
return "" unless field
|
|
31
|
+
|
|
32
|
+
# A plain `Y.Text` root (a simple shared-text editor) reads straight out.
|
|
33
|
+
# (A yrs root's type is fixed by its first typed access, so each reader
|
|
34
|
+
# gets a fresh doc to try a different shared type against the same state.)
|
|
35
|
+
direct = load(state).read_text(field)
|
|
36
|
+
return normalize(direct) if direct && !direct.strip.empty?
|
|
37
|
+
|
|
38
|
+
# Lexical (each block a sibling `Y.XmlText`) and ProseMirror (blocks are
|
|
39
|
+
# `Y.XmlElement`s) both come back from read_xml as block-per-line markup;
|
|
40
|
+
# strip any element tags to plain text.
|
|
41
|
+
markup = load(state).read_xml(field)
|
|
42
|
+
markup ? normalize(strip_tags(markup)) : ""
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# A compact, single-line preview for list UIs.
|
|
46
|
+
def preview(state, limit: 280, field: nil)
|
|
47
|
+
body = text(state, field: field).gsub(/\s+/, " ").strip
|
|
48
|
+
body.length > limit ? "#{body[0, limit].rstrip}…" : body
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def load(state)
|
|
52
|
+
Y::Doc.new.tap { |doc| doc.apply_update(state) }
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def strip_tags(markup)
|
|
56
|
+
markup.gsub(/<[^>]*>/, " ")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def normalize(text)
|
|
60
|
+
text.gsub(/[ \t]+/, " ") # collapse runs of spaces/tabs
|
|
61
|
+
.gsub(/ *\n */, "\n") # trim spaces left around block separators
|
|
62
|
+
.gsub(/\n{3,}/, "\n\n") # cap blank-line runs
|
|
63
|
+
.strip
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
data/lib/y/version.rb
ADDED
data/lib/y.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "y/version"
|
|
4
|
+
|
|
5
|
+
# Load the native extension. Precompiled gems ship it in a per-Ruby-version
|
|
6
|
+
# subdir (lib/y/<major.minor>/yrby.<ext>); a source build puts it flat at
|
|
7
|
+
# lib/y/yrby.<ext>. Try the versioned path first, fall back.
|
|
8
|
+
begin
|
|
9
|
+
RUBY_VERSION =~ /(\d+\.\d+)/
|
|
10
|
+
require_relative "y/#{Regexp.last_match(1)}/yrby"
|
|
11
|
+
rescue LoadError
|
|
12
|
+
require_relative "y/yrby"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
module Y
|
|
16
|
+
# Doc, Error, and the protocol module functions are defined in the Rust
|
|
17
|
+
# extension. The ActionCable integration (Y::ActionCable::Sync) lives in the
|
|
18
|
+
# separate `yrby-actioncable` gem; require "y/action_cable".
|
|
19
|
+
end
|
data/lib/yrby-decoder.rb
ADDED
data/lib/yrby.rb
ADDED