jupyter-ydoc 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_ydoc/_version.py +1 -1
- jupyter_ydoc/yunicode.py +100 -7
- {jupyter_ydoc-3.3.1.dist-info → jupyter_ydoc-3.3.3.dist-info}/METADATA +1 -1
- {jupyter_ydoc-3.3.1.dist-info → jupyter_ydoc-3.3.3.dist-info}/RECORD +7 -7
- {jupyter_ydoc-3.3.1.dist-info → jupyter_ydoc-3.3.3.dist-info}/WHEEL +1 -1
- {jupyter_ydoc-3.3.1.dist-info → jupyter_ydoc-3.3.3.dist-info}/entry_points.txt +0 -0
- {jupyter_ydoc-3.3.1.dist-info → jupyter_ydoc-3.3.3.dist-info}/licenses/LICENSE +0 -0
jupyter_ydoc/_version.py
CHANGED
jupyter_ydoc/yunicode.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# Distributed under the terms of the Modified BSD License.
|
|
3
3
|
|
|
4
4
|
from collections.abc import Callable
|
|
5
|
+
from difflib import SequenceMatcher
|
|
5
6
|
from functools import partial
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
@@ -9,6 +10,9 @@ from pycrdt import Awareness, Doc, Text
|
|
|
9
10
|
|
|
10
11
|
from .ybasedoc import YBaseDoc
|
|
11
12
|
|
|
13
|
+
# Heuristic threshold as recommended in difflib documentation
|
|
14
|
+
SIMILARITY_THREESHOLD = 0.6
|
|
15
|
+
|
|
12
16
|
|
|
13
17
|
class YUnicode(YBaseDoc):
|
|
14
18
|
"""
|
|
@@ -35,7 +39,7 @@ class YUnicode(YBaseDoc):
|
|
|
35
39
|
:type awareness: :class:`pycrdt.Awareness`, optional.
|
|
36
40
|
"""
|
|
37
41
|
super().__init__(ydoc, awareness)
|
|
38
|
-
self._ysource = self._ydoc.get("source", type=Text)
|
|
42
|
+
self._ysource: Text = self._ydoc.get("source", type=Text)
|
|
39
43
|
self.undo_manager.expand_scope(self._ysource)
|
|
40
44
|
|
|
41
45
|
@property
|
|
@@ -64,17 +68,81 @@ class YUnicode(YBaseDoc):
|
|
|
64
68
|
:param value: The content of the document.
|
|
65
69
|
:type value: str
|
|
66
70
|
"""
|
|
67
|
-
|
|
71
|
+
old_value = self.get()
|
|
72
|
+
if old_value == value:
|
|
68
73
|
# no-op if the values are already the same,
|
|
69
74
|
# to avoid side-effects such as cursor jumping to the top
|
|
70
75
|
return
|
|
71
76
|
|
|
77
|
+
before_bytes = old_value.encode("utf-8")
|
|
78
|
+
after_bytes = value.encode("utf-8")
|
|
79
|
+
|
|
72
80
|
with self._ydoc.transaction():
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
81
|
+
matcher = SequenceMatcher(a=before_bytes, b=after_bytes)
|
|
82
|
+
|
|
83
|
+
if (
|
|
84
|
+
matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
|
|
85
|
+
and matcher.ratio() >= SIMILARITY_THREESHOLD
|
|
86
|
+
):
|
|
87
|
+
operations = matcher.get_opcodes()
|
|
88
|
+
|
|
89
|
+
# Fix byte ranges and check for problematic overlaps
|
|
90
|
+
fixed_operations = []
|
|
91
|
+
prev_end = 0
|
|
92
|
+
prev_tag = None
|
|
93
|
+
has_overlap = False
|
|
94
|
+
|
|
95
|
+
for tag, i1, i2, j1, j2 in operations:
|
|
96
|
+
# Fix byte ranges to proper UTF-8 character boundaries
|
|
97
|
+
i1_fixed, i2_fixed = _fix_byte_range_to_char_boundary(before_bytes, i1, i2)
|
|
98
|
+
j1_fixed, j2_fixed = _fix_byte_range_to_char_boundary(after_bytes, j1, j2)
|
|
99
|
+
|
|
100
|
+
# Check if this operation overlaps with the previous one
|
|
101
|
+
# which can happen with grapheme clusters (emoji + modifiers, etc.)
|
|
102
|
+
if i1_fixed < prev_end and prev_tag != "equal":
|
|
103
|
+
has_overlap = True
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
prev_end = i2_fixed
|
|
107
|
+
prev_tag = tag
|
|
108
|
+
fixed_operations.append((tag, i1_fixed, i2_fixed, j1_fixed, j2_fixed))
|
|
109
|
+
|
|
110
|
+
# If we detected overlapping operations, fall back to hard reload
|
|
111
|
+
if has_overlap:
|
|
112
|
+
self._ysource.clear()
|
|
113
|
+
if value:
|
|
114
|
+
self._ysource += value
|
|
115
|
+
else:
|
|
116
|
+
# Apply granular operations
|
|
117
|
+
offset = 0
|
|
118
|
+
for tag, i1, i2, j1, j2 in fixed_operations:
|
|
119
|
+
match tag:
|
|
120
|
+
case "replace":
|
|
121
|
+
self._ysource[i1 + offset : i2 + offset] = after_bytes[
|
|
122
|
+
j1:j2
|
|
123
|
+
].decode("utf-8")
|
|
124
|
+
offset += (j2 - j1) - (i2 - i1)
|
|
125
|
+
case "delete":
|
|
126
|
+
del self._ysource[i1 + offset : i2 + offset]
|
|
127
|
+
offset -= i2 - i1
|
|
128
|
+
case "insert":
|
|
129
|
+
self._ysource.insert(
|
|
130
|
+
i1 + offset, after_bytes[j1:j2].decode("utf-8")
|
|
131
|
+
)
|
|
132
|
+
offset += j2 - j1
|
|
133
|
+
case "equal":
|
|
134
|
+
pass
|
|
135
|
+
case _:
|
|
136
|
+
raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
|
|
137
|
+
else:
|
|
138
|
+
# for very different strings, just replace the whole content;
|
|
139
|
+
# this avoids generating a huge number of operations
|
|
140
|
+
|
|
141
|
+
# clear document
|
|
142
|
+
self._ysource.clear()
|
|
143
|
+
# initialize document
|
|
144
|
+
if value:
|
|
145
|
+
self._ysource += value
|
|
78
146
|
|
|
79
147
|
def observe(self, callback: Callable[[str, Any], None]) -> None:
|
|
80
148
|
"""
|
|
@@ -86,3 +154,28 @@ class YUnicode(YBaseDoc):
|
|
|
86
154
|
self.unobserve()
|
|
87
155
|
self._subscriptions[self._ystate] = self._ystate.observe(partial(callback, "state"))
|
|
88
156
|
self._subscriptions[self._ysource] = self._ysource.observe(partial(callback, "source"))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _is_utf8_continuation_byte(byte: int) -> bool:
|
|
160
|
+
"""Check if a byte is a UTF-8 continuation byte (10xxxxxx)."""
|
|
161
|
+
return (byte & 0xC0) == 0x80
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _fix_byte_range_to_char_boundary(data: bytes, start: int, end: int) -> tuple[int, int]:
|
|
165
|
+
"""
|
|
166
|
+
Adjust byte indices to proper UTF-8 character boundaries.
|
|
167
|
+
|
|
168
|
+
:param data: The byte data.
|
|
169
|
+
:param start: The start byte index.
|
|
170
|
+
:param end: The end byte index.
|
|
171
|
+
:return: A tuple of (adjusted_start, adjusted_end).
|
|
172
|
+
"""
|
|
173
|
+
# Move start backward to the beginning of a UTF-8 character
|
|
174
|
+
while start > 0 and start < len(data) and _is_utf8_continuation_byte(data[start]):
|
|
175
|
+
start -= 1
|
|
176
|
+
|
|
177
|
+
# Move end forward to the end of a UTF-8 character
|
|
178
|
+
while end < len(data) and _is_utf8_continuation_byte(data[end]):
|
|
179
|
+
end += 1
|
|
180
|
+
|
|
181
|
+
return start, end
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
jupyter_ydoc/__init__.py,sha256=itUidK7o0_wS6YcbKKIyt1su7hM3-YppQshFheTQQdw,428
|
|
2
|
-
jupyter_ydoc/_version.py,sha256=
|
|
2
|
+
jupyter_ydoc/_version.py,sha256=N8PaAgfH_KWhDzwQorENf6K8gehfU0qesXEFAAPh4ZY,171
|
|
3
3
|
jupyter_ydoc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
jupyter_ydoc/utils.py,sha256=yKvcuqhpylMinmjuscuZ_kY8KPEseFbwcg5K9VzYOfs,810
|
|
5
5
|
jupyter_ydoc/ybasedoc.py,sha256=c0jwhULtTNCjOYHbXhDhKaD6OJYn7hpL4hcLZWyGJsU,5115
|
|
6
6
|
jupyter_ydoc/yblob.py,sha256=JZiXQhONqFS8Cqdglx__AVeS18gyRq0yHq-AQKFPVfw,2316
|
|
7
7
|
jupyter_ydoc/yfile.py,sha256=XTMtAXDWgIOLU2KUQxkLJz2cGvSPlOxpvJc4daXCV6I,198
|
|
8
8
|
jupyter_ydoc/ynotebook.py,sha256=Xplr1x57Y6gWAU84u-Krh9_Xkn_-AN-Vte4DJY5VOfw,13452
|
|
9
|
-
jupyter_ydoc/yunicode.py,sha256=
|
|
10
|
-
jupyter_ydoc-3.3.
|
|
11
|
-
jupyter_ydoc-3.3.
|
|
12
|
-
jupyter_ydoc-3.3.
|
|
13
|
-
jupyter_ydoc-3.3.
|
|
14
|
-
jupyter_ydoc-3.3.
|
|
9
|
+
jupyter_ydoc/yunicode.py,sha256=gmWNvkItUlbHAtc0JeVdq2ffAUMi9gr9lE83vDuNotE,6491
|
|
10
|
+
jupyter_ydoc-3.3.3.dist-info/METADATA,sha256=gE7ys9Pdw3JCEmVT0s6QrVrd7fejgGvHnGVeHzp6pk4,2282
|
|
11
|
+
jupyter_ydoc-3.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
12
|
+
jupyter_ydoc-3.3.3.dist-info/entry_points.txt,sha256=lgvRG-rpsjRKf8cy7LpO7fqwwXy0sBVMCwhGOHgn4mc,164
|
|
13
|
+
jupyter_ydoc-3.3.3.dist-info/licenses/LICENSE,sha256=dqphsFbhnlzPK7Vlkc66Zc7g7PS-e1dln07GXIVpFCQ,1567
|
|
14
|
+
jupyter_ydoc-3.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|