jupyter-ydoc 3.3.1__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jupyter_ydoc/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '3.3.1'
4
+ __version__ = VERSION = '3.3.3'
jupyter_ydoc/yunicode.py CHANGED
@@ -2,6 +2,7 @@
2
2
  # Distributed under the terms of the Modified BSD License.
3
3
 
4
4
  from collections.abc import Callable
5
+ from difflib import SequenceMatcher
5
6
  from functools import partial
6
7
  from typing import Any
7
8
 
@@ -9,6 +10,9 @@ from pycrdt import Awareness, Doc, Text
9
10
 
10
11
  from .ybasedoc import YBaseDoc
11
12
 
13
+ # Heuristic threshold as recommended in difflib documentation
14
+ SIMILARITY_THREESHOLD = 0.6
15
+
12
16
 
13
17
  class YUnicode(YBaseDoc):
14
18
  """
@@ -35,7 +39,7 @@ class YUnicode(YBaseDoc):
35
39
  :type awareness: :class:`pycrdt.Awareness`, optional.
36
40
  """
37
41
  super().__init__(ydoc, awareness)
38
- self._ysource = self._ydoc.get("source", type=Text)
42
+ self._ysource: Text = self._ydoc.get("source", type=Text)
39
43
  self.undo_manager.expand_scope(self._ysource)
40
44
 
41
45
  @property
@@ -64,17 +68,81 @@ class YUnicode(YBaseDoc):
64
68
  :param value: The content of the document.
65
69
  :type value: str
66
70
  """
67
- if self.get() == value:
71
+ old_value = self.get()
72
+ if old_value == value:
68
73
  # no-op if the values are already the same,
69
74
  # to avoid side-effects such as cursor jumping to the top
70
75
  return
71
76
 
77
+ before_bytes = old_value.encode("utf-8")
78
+ after_bytes = value.encode("utf-8")
79
+
72
80
  with self._ydoc.transaction():
73
- # clear document
74
- self._ysource.clear()
75
- # initialize document
76
- if value:
77
- self._ysource += value
81
+ matcher = SequenceMatcher(a=before_bytes, b=after_bytes)
82
+
83
+ if (
84
+ matcher.real_quick_ratio() >= SIMILARITY_THREESHOLD
85
+ and matcher.ratio() >= SIMILARITY_THREESHOLD
86
+ ):
87
+ operations = matcher.get_opcodes()
88
+
89
+ # Fix byte ranges and check for problematic overlaps
90
+ fixed_operations = []
91
+ prev_end = 0
92
+ prev_tag = None
93
+ has_overlap = False
94
+
95
+ for tag, i1, i2, j1, j2 in operations:
96
+ # Fix byte ranges to proper UTF-8 character boundaries
97
+ i1_fixed, i2_fixed = _fix_byte_range_to_char_boundary(before_bytes, i1, i2)
98
+ j1_fixed, j2_fixed = _fix_byte_range_to_char_boundary(after_bytes, j1, j2)
99
+
100
+ # Check if this operation overlaps with the previous one
101
+ # which can happen with grapheme clusters (emoji + modifiers, etc.)
102
+ if i1_fixed < prev_end and prev_tag != "equal":
103
+ has_overlap = True
104
+ break
105
+
106
+ prev_end = i2_fixed
107
+ prev_tag = tag
108
+ fixed_operations.append((tag, i1_fixed, i2_fixed, j1_fixed, j2_fixed))
109
+
110
+ # If we detected overlapping operations, fall back to hard reload
111
+ if has_overlap:
112
+ self._ysource.clear()
113
+ if value:
114
+ self._ysource += value
115
+ else:
116
+ # Apply granular operations
117
+ offset = 0
118
+ for tag, i1, i2, j1, j2 in fixed_operations:
119
+ match tag:
120
+ case "replace":
121
+ self._ysource[i1 + offset : i2 + offset] = after_bytes[
122
+ j1:j2
123
+ ].decode("utf-8")
124
+ offset += (j2 - j1) - (i2 - i1)
125
+ case "delete":
126
+ del self._ysource[i1 + offset : i2 + offset]
127
+ offset -= i2 - i1
128
+ case "insert":
129
+ self._ysource.insert(
130
+ i1 + offset, after_bytes[j1:j2].decode("utf-8")
131
+ )
132
+ offset += j2 - j1
133
+ case "equal":
134
+ pass
135
+ case _:
136
+ raise ValueError(f"Unknown tag '{tag}' in sequence matcher")
137
+ else:
138
+ # for very different strings, just replace the whole content;
139
+ # this avoids generating a huge number of operations
140
+
141
+ # clear document
142
+ self._ysource.clear()
143
+ # initialize document
144
+ if value:
145
+ self._ysource += value
78
146
 
79
147
  def observe(self, callback: Callable[[str, Any], None]) -> None:
80
148
  """
@@ -86,3 +154,28 @@ class YUnicode(YBaseDoc):
86
154
  self.unobserve()
87
155
  self._subscriptions[self._ystate] = self._ystate.observe(partial(callback, "state"))
88
156
  self._subscriptions[self._ysource] = self._ysource.observe(partial(callback, "source"))
157
+
158
+
159
+ def _is_utf8_continuation_byte(byte: int) -> bool:
160
+ """Check if a byte is a UTF-8 continuation byte (10xxxxxx)."""
161
+ return (byte & 0xC0) == 0x80
162
+
163
+
164
+ def _fix_byte_range_to_char_boundary(data: bytes, start: int, end: int) -> tuple[int, int]:
165
+ """
166
+ Adjust byte indices to proper UTF-8 character boundaries.
167
+
168
+ :param data: The byte data.
169
+ :param start: The start byte index.
170
+ :param end: The end byte index.
171
+ :return: A tuple of (adjusted_start, adjusted_end).
172
+ """
173
+ # Move start backward to the beginning of a UTF-8 character
174
+ while start > 0 and start < len(data) and _is_utf8_continuation_byte(data[start]):
175
+ start -= 1
176
+
177
+ # Move end forward to the end of a UTF-8 character
178
+ while end < len(data) and _is_utf8_continuation_byte(data[end]):
179
+ end += 1
180
+
181
+ return start, end
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: jupyter-ydoc
3
- Version: 3.3.1
3
+ Version: 3.3.3
4
4
  Summary: Document structures for collaborative editing using Ypy
5
5
  Project-URL: Homepage, https://jupyter.org
6
6
  Project-URL: Source, https://github.com/jupyter-server/jupyter_ydoc
@@ -1,14 +1,14 @@
1
1
  jupyter_ydoc/__init__.py,sha256=itUidK7o0_wS6YcbKKIyt1su7hM3-YppQshFheTQQdw,428
2
- jupyter_ydoc/_version.py,sha256=x_sVWMD6ap_A4vQm5y9tdlqTDcRFFLDXlpVUMVmxlIM,171
2
+ jupyter_ydoc/_version.py,sha256=N8PaAgfH_KWhDzwQorENf6K8gehfU0qesXEFAAPh4ZY,171
3
3
  jupyter_ydoc/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  jupyter_ydoc/utils.py,sha256=yKvcuqhpylMinmjuscuZ_kY8KPEseFbwcg5K9VzYOfs,810
5
5
  jupyter_ydoc/ybasedoc.py,sha256=c0jwhULtTNCjOYHbXhDhKaD6OJYn7hpL4hcLZWyGJsU,5115
6
6
  jupyter_ydoc/yblob.py,sha256=JZiXQhONqFS8Cqdglx__AVeS18gyRq0yHq-AQKFPVfw,2316
7
7
  jupyter_ydoc/yfile.py,sha256=XTMtAXDWgIOLU2KUQxkLJz2cGvSPlOxpvJc4daXCV6I,198
8
8
  jupyter_ydoc/ynotebook.py,sha256=Xplr1x57Y6gWAU84u-Krh9_Xkn_-AN-Vte4DJY5VOfw,13452
9
- jupyter_ydoc/yunicode.py,sha256=ZLNLTJoy75gxCwI8ZNBv_gD42hrzHfD-J6GOO1WaJIE,2574
10
- jupyter_ydoc-3.3.1.dist-info/METADATA,sha256=rimDKlYURSFVG1DFMI9t5gYrAnGmuesVmQNu0k8a6Qs,2282
11
- jupyter_ydoc-3.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- jupyter_ydoc-3.3.1.dist-info/entry_points.txt,sha256=lgvRG-rpsjRKf8cy7LpO7fqwwXy0sBVMCwhGOHgn4mc,164
13
- jupyter_ydoc-3.3.1.dist-info/licenses/LICENSE,sha256=dqphsFbhnlzPK7Vlkc66Zc7g7PS-e1dln07GXIVpFCQ,1567
14
- jupyter_ydoc-3.3.1.dist-info/RECORD,,
9
+ jupyter_ydoc/yunicode.py,sha256=gmWNvkItUlbHAtc0JeVdq2ffAUMi9gr9lE83vDuNotE,6491
10
+ jupyter_ydoc-3.3.3.dist-info/METADATA,sha256=gE7ys9Pdw3JCEmVT0s6QrVrd7fejgGvHnGVeHzp6pk4,2282
11
+ jupyter_ydoc-3.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
12
+ jupyter_ydoc-3.3.3.dist-info/entry_points.txt,sha256=lgvRG-rpsjRKf8cy7LpO7fqwwXy0sBVMCwhGOHgn4mc,164
13
+ jupyter_ydoc-3.3.3.dist-info/licenses/LICENSE,sha256=dqphsFbhnlzPK7Vlkc66Zc7g7PS-e1dln07GXIVpFCQ,1567
14
+ jupyter_ydoc-3.3.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any