jupyter-ydoc 3.3.2__py3-none-any.whl → 3.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jupyter_ydoc/_version.py +1 -1
- jupyter_ydoc/yunicode.py +77 -16
- {jupyter_ydoc-3.3.2.dist-info → jupyter_ydoc-3.3.3.dist-info}/METADATA +1 -1
- {jupyter_ydoc-3.3.2.dist-info → jupyter_ydoc-3.3.3.dist-info}/RECORD +7 -7
- {jupyter_ydoc-3.3.2.dist-info → jupyter_ydoc-3.3.3.dist-info}/WHEEL +0 -0
- {jupyter_ydoc-3.3.2.dist-info → jupyter_ydoc-3.3.3.dist-info}/entry_points.txt +0 -0
- {jupyter_ydoc-3.3.2.dist-info → jupyter_ydoc-3.3.3.dist-info}/licenses/LICENSE +0 -0
jupyter_ydoc/_version.py
CHANGED
jupyter_ydoc/yunicode.py
CHANGED
|
@@ -74,30 +74,66 @@ class YUnicode(YBaseDoc):
|
|
|
74
74
|
# to avoid side-effects such as cursor jumping to the top
|
|
75
75
|
return
|
|
76
76
|
|
|
77
|
+
before_bytes = old_value.encode("utf-8")
|
|
78
|
+
after_bytes = value.encode("utf-8")
|
|
79
|
+
|
|
77
80
|
with self._ydoc.transaction():
|
|
78
|
-
matcher = SequenceMatcher(a=
|
|
81
|
+
matcher = SequenceMatcher(a=before_bytes, b=after_bytes)
|
|
79
82
|
|
|
80
83
|
if (
|
|
81
84
|
matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
|
|
82
85
|
and matcher.ratio() >= SIMILARITY_THREESHOLD
|
|
83
86
|
):
|
|
84
87
|
operations = matcher.get_opcodes()
|
|
85
|
-
|
|
88
|
+
|
|
89
|
+
# Fix byte ranges and check for problematic overlaps
|
|
90
|
+
fixed_operations = []
|
|
91
|
+
prev_end = 0
|
|
92
|
+
prev_tag = None
|
|
93
|
+
has_overlap = False
|
|
94
|
+
|
|
86
95
|
for tag, i1, i2, j1, j2 in operations:
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
96
|
+
# Fix byte ranges to proper UTF-8 character boundaries
|
|
97
|
+
i1_fixed, i2_fixed = _fix_byte_range_to_char_boundary(before_bytes, i1, i2)
|
|
98
|
+
j1_fixed, j2_fixed = _fix_byte_range_to_char_boundary(after_bytes, j1, j2)
|
|
99
|
+
|
|
100
|
+
# Check if this operation overlaps with the previous one
|
|
101
|
+
# which can happen with grapheme clusters (emoji + modifiers, etc.)
|
|
102
|
+
if i1_fixed < prev_end and prev_tag != "equal":
|
|
103
|
+
has_overlap = True
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
prev_end = i2_fixed
|
|
107
|
+
prev_tag = tag
|
|
108
|
+
fixed_operations.append((tag, i1_fixed, i2_fixed, j1_fixed, j2_fixed))
|
|
109
|
+
|
|
110
|
+
# If we detected overlapping operations, fall back to hard reload
|
|
111
|
+
if has_overlap:
|
|
112
|
+
self._ysource.clear()
|
|
113
|
+
if value:
|
|
114
|
+
self._ysource += value
|
|
115
|
+
else:
|
|
116
|
+
# Apply granular operations
|
|
117
|
+
offset = 0
|
|
118
|
+
for tag, i1, i2, j1, j2 in fixed_operations:
|
|
119
|
+
match tag:
|
|
120
|
+
case "replace":
|
|
121
|
+
self._ysource[i1 + offset : i2 + offset] = after_bytes[
|
|
122
|
+
j1:j2
|
|
123
|
+
].decode("utf-8")
|
|
124
|
+
offset += (j2 - j1) - (i2 - i1)
|
|
125
|
+
case "delete":
|
|
126
|
+
del self._ysource[i1 + offset : i2 + offset]
|
|
127
|
+
offset -= i2 - i1
|
|
128
|
+
case "insert":
|
|
129
|
+
self._ysource.insert(
|
|
130
|
+
i1 + offset, after_bytes[j1:j2].decode("utf-8")
|
|
131
|
+
)
|
|
132
|
+
offset += j2 - j1
|
|
133
|
+
case "equal":
|
|
134
|
+
pass
|
|
135
|
+
case _:
|
|
136
|
+
raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
|
|
101
137
|
else:
|
|
102
138
|
# for very different strings, just replace the whole content;
|
|
103
139
|
# this avoids generating a huge number of operations
|
|
@@ -118,3 +154,28 @@ class YUnicode(YBaseDoc):
|
|
|
118
154
|
self.unobserve()
|
|
119
155
|
self._subscriptions[self._ystate] = self._ystate.observe(partial(callback, "state"))
|
|
120
156
|
self._subscriptions[self._ysource] = self._ysource.observe(partial(callback, "source"))
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _is_utf8_continuation_byte(byte: int) -> bool:
|
|
160
|
+
"""Check if a byte is a UTF-8 continuation byte (10xxxxxx)."""
|
|
161
|
+
return (byte & 0xC0) == 0x80
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _fix_byte_range_to_char_boundary(data: bytes, start: int, end: int) -> tuple[int, int]:
|
|
165
|
+
"""
|
|
166
|
+
Adjust byte indices to proper UTF-8 character boundaries.
|
|
167
|
+
|
|
168
|
+
:param data: The byte data.
|
|
169
|
+
:param start: The start byte index.
|
|
170
|
+
:param end: The end byte index.
|
|
171
|
+
:return: A tuple of (adjusted_start, adjusted_end).
|
|
172
|
+
"""
|
|
173
|
+
# Move start backward to the beginning of a UTF-8 character
|
|
174
|
+
while start > 0 and start < len(data) and _is_utf8_continuation_byte(data[start]):
|
|
175
|
+
start -= 1
|
|
176
|
+
|
|
177
|
+
# Move end forward to the end of a UTF-8 character
|
|
178
|
+
while end < len(data) and _is_utf8_continuation_byte(data[end]):
|
|
179
|
+
end += 1
|
|
180
|
+
|
|
181
|
+
return start, end
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
jupyter_ydoc/__init__.py,sha256=itUidK7o0_wS6YcbKKIyt1su7hM3-YppQshFheTQQdw,428
|
|
2
|
-
jupyter_ydoc/_version.py,sha256=
|
|
2
|
+
jupyter_ydoc/_version.py,sha256=N8PaAgfH_KWhDzwQorENf6K8gehfU0qesXEFAAPh4ZY,171
|
|
3
3
|
jupyter_ydoc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
jupyter_ydoc/utils.py,sha256=yKvcuqhpylMinmjuscuZ_kY8KPEseFbwcg5K9VzYOfs,810
|
|
5
5
|
jupyter_ydoc/ybasedoc.py,sha256=c0jwhULtTNCjOYHbXhDhKaD6OJYn7hpL4hcLZWyGJsU,5115
|
|
6
6
|
jupyter_ydoc/yblob.py,sha256=JZiXQhONqFS8Cqdglx__AVeS18gyRq0yHq-AQKFPVfw,2316
|
|
7
7
|
jupyter_ydoc/yfile.py,sha256=XTMtAXDWgIOLU2KUQxkLJz2cGvSPlOxpvJc4daXCV6I,198
|
|
8
8
|
jupyter_ydoc/ynotebook.py,sha256=Xplr1x57Y6gWAU84u-Krh9_Xkn_-AN-Vte4DJY5VOfw,13452
|
|
9
|
-
jupyter_ydoc/yunicode.py,sha256=
|
|
10
|
-
jupyter_ydoc-3.3.
|
|
11
|
-
jupyter_ydoc-3.3.
|
|
12
|
-
jupyter_ydoc-3.3.
|
|
13
|
-
jupyter_ydoc-3.3.
|
|
14
|
-
jupyter_ydoc-3.3.
|
|
9
|
+
jupyter_ydoc/yunicode.py,sha256=gmWNvkItUlbHAtc0JeVdq2ffAUMi9gr9lE83vDuNotE,6491
|
|
10
|
+
jupyter_ydoc-3.3.3.dist-info/METADATA,sha256=gE7ys9Pdw3JCEmVT0s6QrVrd7fejgGvHnGVeHzp6pk4,2282
|
|
11
|
+
jupyter_ydoc-3.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
12
|
+
jupyter_ydoc-3.3.3.dist-info/entry_points.txt,sha256=lgvRG-rpsjRKf8cy7LpO7fqwwXy0sBVMCwhGOHgn4mc,164
|
|
13
|
+
jupyter_ydoc-3.3.3.dist-info/licenses/LICENSE,sha256=dqphsFbhnlzPK7Vlkc66Zc7g7PS-e1dln07GXIVpFCQ,1567
|
|
14
|
+
jupyter_ydoc-3.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|