scanlib 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {scanlib-1.2.0/src/scanlib.egg-info → scanlib-1.3.0}/PKG-INFO +3 -3
  2. {scanlib-1.2.0 → scanlib-1.3.0}/README.md +2 -2
  3. {scanlib-1.2.0 → scanlib-1.3.0}/pyproject.toml +1 -1
  4. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/__init__.py +84 -25
  5. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/_mdns.py +117 -23
  6. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/_types.py +80 -3
  7. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/backends/_escl.py +21 -27
  8. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/backends/_macos.py +88 -6
  9. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/backends/_sane.py +28 -7
  10. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/backends/_wia.py +157 -3
  11. {scanlib-1.2.0 → scanlib-1.3.0/src/scanlib.egg-info}/PKG-INFO +3 -3
  12. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib.egg-info/SOURCES.txt +1 -0
  13. scanlib-1.3.0/tests/test_composite.py +143 -0
  14. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_mdns.py +114 -0
  15. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_types.py +88 -4
  16. {scanlib-1.2.0 → scanlib-1.3.0}/LICENSE +0 -0
  17. {scanlib-1.2.0 → scanlib-1.3.0}/setup.cfg +0 -0
  18. {scanlib-1.2.0 → scanlib-1.3.0}/setup.py +0 -0
  19. {scanlib-1.2.0 → scanlib-1.3.0}/src/accel/_scanlib_accel.c +0 -0
  20. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/__main__.py +0 -0
  21. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/_jpeg.py +0 -0
  22. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib/backends/__init__.py +0 -0
  23. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib.egg-info/dependency_links.txt +0 -0
  24. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib.egg-info/entry_points.txt +0 -0
  25. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib.egg-info/requires.txt +0 -0
  26. {scanlib-1.2.0 → scanlib-1.3.0}/src/scanlib.egg-info/top_level.txt +0 -0
  27. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_hardware.py +0 -0
  28. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_jpeg.py +0 -0
  29. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_pdf.py +0 -0
  30. {scanlib-1.2.0 → scanlib-1.3.0}/tests/test_resolve.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanlib
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: A multiplatform document scanning library for Python
5
5
  Author-email: Angelo Mottola <a.mottola@gmail.com>
6
6
  License: MIT
@@ -67,10 +67,10 @@ A multiplatform document scanning library for Python with platform-native scanni
67
67
  | Platform | Backend | Scanner types | eSCL | System packages |
68
68
  |---|---|---|---|---|
69
69
  | **macOS 10.7+** | ImageCaptureCore | USB + network | Opt-in (`SCANLIB_ESCL=1`) | None |
70
- | **Windows 10+** | WIA 2.0 | USB | Always enabled | None |
70
+ | **Windows 10+** | WIA 2.0 | USB + network | Always enabled | None |
71
71
  | **Linux** | SANE | USB | Always enabled | `libsane-dev libjpeg-dev` |
72
72
 
73
- The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically. On macOS, ImageCaptureCore already handles network scanners natively; set `SCANLIB_ESCL=1` to use the eSCL backend instead.
73
+ The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically, and a network scanner found by both is reported once (matched by device UUID or IP), preferring the platform driver. On macOS, ImageCaptureCore already handles network scanners natively, so eSCL is opt-in. Set `SCANLIB_ESCL=1` to enable eSCL on macOS, or — on any platform — to prefer the eSCL driver over the platform driver for scanners seen by both.
74
74
 
75
75
  ## Installation
76
76
 
@@ -23,10 +23,10 @@ A multiplatform document scanning library for Python with platform-native scanni
23
23
  | Platform | Backend | Scanner types | eSCL | System packages |
24
24
  |---|---|---|---|---|
25
25
  | **macOS 10.7+** | ImageCaptureCore | USB + network | Opt-in (`SCANLIB_ESCL=1`) | None |
26
- | **Windows 10+** | WIA 2.0 | USB | Always enabled | None |
26
+ | **Windows 10+** | WIA 2.0 | USB + network | Always enabled | None |
27
27
  | **Linux** | SANE | USB | Always enabled | `libsane-dev libjpeg-dev` |
28
28
 
29
- The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically. On macOS, ImageCaptureCore already handles network scanners natively; set `SCANLIB_ESCL=1` to use the eSCL backend instead.
29
+ The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically, and a network scanner found by both is reported once (matched by device UUID or IP), preferring the platform driver. On macOS, ImageCaptureCore already handles network scanners natively, so eSCL is opt-in. Set `SCANLIB_ESCL=1` to enable eSCL on macOS, or — on any platform — to prefer the eSCL driver over the platform driver for scanners seen by both.
30
30
 
31
31
  ## Installation
32
32
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scanlib"
7
- version = "1.2.0"
7
+ version = "1.3.0"
8
8
  description = "A multiplatform document scanning library for Python"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -23,6 +23,7 @@ from ._types import (
23
23
  ScanError,
24
24
  ScanLibError,
25
25
  Scanner,
26
+ ScannerBusyError,
26
27
  ScannerDefaults,
27
28
  ScannerNotOpenError,
28
29
  ScanOptions,
@@ -52,6 +53,7 @@ __all__ = [
52
53
  "ScanLibError",
53
54
  "ScanError",
54
55
  "ScanAborted",
56
+ "ScannerBusyError",
55
57
  "FeederEmptyError",
56
58
  "NoScannerFoundError",
57
59
  "BackendNotAvailableError",
@@ -63,19 +65,46 @@ __all__ = [
63
65
  # ---------------------------------------------------------------------------
64
66
 
65
67
 
68
+ def _await_thread(thread: threading.Thread, timeout: float) -> None:
69
+ """Wait up to *timeout* seconds for *thread* to finish.
70
+
71
+ On macOS this delegates to the backend's run-loop-aware wait so the
72
+ ImageCaptureCore platform backend — whose discovery callbacks arrive
73
+ on the main thread's run loop — isn't starved. Everywhere else a
74
+ plain blocking join is correct.
75
+ """
76
+ if sys.platform == "darwin":
77
+ from .backends._macos import await_thread
78
+
79
+ await_thread(thread, timeout)
80
+ else:
81
+ thread.join(timeout)
82
+
83
+
66
84
  class _CompositeBackend:
67
85
  """Merges results from a platform backend and the eSCL backend.
68
86
 
69
87
  ``list_scanners`` runs both backends' discovery in parallel and
70
- deduplicates by IP address. Each scanner's ``_backend_impl`` points
71
- to whichever backend discovered it.
88
+ deduplicates: a platform scanner and an eSCL scanner are the same
89
+ physical device when their UUIDs match or their IPs match. Each
90
+ scanner's ``_backend_impl`` points to whichever backend discovered it.
91
+
92
+ *prefer_escl* decides which entry survives a duplicate. On Linux and
93
+ Windows the platform driver is preferred (the platform backend is
94
+ always on), so the eSCL duplicate is dropped. On macOS the eSCL
95
+ backend is opt-in (``SCANLIB_ESCL=1``); enabling it signals a
96
+ preference for the eSCL driver, so there the platform duplicate is
97
+ dropped instead.
72
98
  """
73
99
 
74
- def __init__(self, platform_backend: ScanBackend) -> None:
100
+ def __init__(
101
+ self, platform_backend: ScanBackend, *, prefer_escl: bool = False
102
+ ) -> None:
75
103
  from .backends._escl import EsclBackend
76
104
 
77
105
  self._platform = platform_backend
78
106
  self._escl = EsclBackend()
107
+ self._prefer_escl = prefer_escl
79
108
 
80
109
  def list_scanners(
81
110
  self,
@@ -105,12 +134,14 @@ class _CompositeBackend:
105
134
  t_platform.start()
106
135
  t_escl.start()
107
136
 
108
- # Wait for eSCL first (fast, ~4s max), then give the platform
137
+ # Wait for eSCL first (fast, ~6s max), then give the platform
109
138
  # backend a short grace period to finish. Don't block on a
110
- # slow platform backend when eSCL already has results.
111
- t_escl.join(timeout=timeout + 2)
139
+ # slow platform backend when eSCL already has results. _await_thread
140
+ # pumps the macOS run loop while waiting (see its docstring) so the
141
+ # ImageCaptureCore platform backend isn't starved.
142
+ _await_thread(t_escl, timeout + 2)
112
143
  if t_platform.is_alive():
113
- t_platform.join(timeout=2.0)
144
+ _await_thread(t_platform, 2.0)
114
145
 
115
146
  if cancel is not None and cancel.is_set():
116
147
  return []
@@ -120,25 +151,46 @@ class _CompositeBackend:
120
151
 
121
152
  if not escl_scanners:
122
153
  return platform_scanners
154
+ if not platform_scanners:
155
+ return escl_scanners
123
156
 
124
- # Collect IPs from platform scanners for deduplication
125
157
  from ._mdns import extract_ip_from_uri
126
158
 
127
- platform_ips: set[str] = set()
128
- for s in platform_scanners:
129
- ip = extract_ip_from_uri(s.name)
130
- if ip:
131
- platform_ips.add(ip)
132
-
133
- # Add eSCL scanners not already found by the platform backend
159
+ # A platform scanner and an eSCL scanner are the same physical
160
+ # device when their UUIDs match or their IPs match. The eSCL IP
161
+ # comes from mDNS (keyed by scanner id); the platform IP, when it
162
+ # has one, is parsed out of its name (e.g. a SANE ``escl:`` URI).
163
+ # On macOS the platform scanner exposes only a UUID, on Windows a
164
+ # WSD scanner exposes a UUID, on Linux a network SANE device
165
+ # exposes an IP so matching on either key covers them all. The
166
+ # identity sets never contain None, so a missing UUID/IP simply
167
+ # never matches (no explicit guard needed).
134
168
  escl_ips = self._escl.get_scanner_ips()
135
- for s in escl_scanners:
136
- ip = escl_ips.get(s.id)
137
- if ip and ip in platform_ips:
138
- continue # already discovered by platform backend
139
- platform_scanners.append(s)
140
169
 
141
- return platform_scanners
170
+ if self._prefer_escl:
171
+ # Keep every eSCL entry; drop the platform duplicates.
172
+ escl_uuids = {s.uuid for s in escl_scanners if s.uuid}
173
+ escl_ip_set = set(escl_ips.values())
174
+ kept = [
175
+ s
176
+ for s in platform_scanners
177
+ if s.uuid not in escl_uuids
178
+ and extract_ip_from_uri(s.name) not in escl_ip_set
179
+ ]
180
+ return kept + escl_scanners
181
+
182
+ # Keep every platform entry; drop the eSCL duplicates.
183
+ platform_uuids = {s.uuid for s in platform_scanners if s.uuid}
184
+ platform_ip_set = {
185
+ ip for s in platform_scanners if (ip := extract_ip_from_uri(s.name))
186
+ }
187
+ kept = [
188
+ s
189
+ for s in escl_scanners
190
+ if s.uuid not in platform_uuids
191
+ and escl_ips.get(s.id) not in platform_ip_set
192
+ ]
193
+ return platform_scanners + kept
142
194
 
143
195
  # Delegate remaining methods to the scanner's own _backend_impl
144
196
  def open_scanner(self, scanner: Scanner) -> None:
@@ -169,23 +221,30 @@ def _get_backend() -> ScanBackend:
169
221
  if _backend is not None:
170
222
  return _backend
171
223
 
224
+ # Setting SCANLIB_ESCL=1 signals a preference for the eSCL driver over
225
+ # the platform driver when both discover the same device.
226
+ prefer_escl = os.environ.get("SCANLIB_ESCL", "").strip() == "1"
227
+
172
228
  if sys.platform == "linux":
173
229
  from .backends._sane import SaneBackend
174
230
 
175
- _backend = _CompositeBackend(SaneBackend())
231
+ _backend = _CompositeBackend(SaneBackend(), prefer_escl=prefer_escl)
176
232
 
177
233
  elif sys.platform == "darwin":
178
234
  from .backends._macos import MacOSBackend
179
235
 
180
- if os.environ.get("SCANLIB_ESCL", "").strip() == "1":
181
- _backend = _CompositeBackend(MacOSBackend())
236
+ # On macOS the eSCL backend is opt-in (ImageCaptureCore already
237
+ # handles eSCL natively), so the composite is only used when
238
+ # SCANLIB_ESCL=1 — and enabling it implies preferring eSCL.
239
+ if prefer_escl:
240
+ _backend = _CompositeBackend(MacOSBackend(), prefer_escl=True)
182
241
  else:
183
242
  _backend = MacOSBackend()
184
243
 
185
244
  elif sys.platform == "win32":
186
245
  from .backends._wia import WiaBackend
187
246
 
188
- _backend = _CompositeBackend(WiaBackend())
247
+ _backend = _CompositeBackend(WiaBackend(), prefer_escl=prefer_escl)
189
248
 
190
249
  else:
191
250
  raise BackendNotAvailableError(f"Unsupported platform: {sys.platform}")
@@ -33,6 +33,8 @@ _SERVICE_TYPES = (
33
33
  "_uscan._tcp.local.",
34
34
  "_uscans._tcp.local.",
35
35
  )
36
+ # Normalized (no trailing dot, lower-case) set for matching PTR owner names.
37
+ _SERVICE_TYPE_SET = frozenset(s.rstrip(".").lower() for s in _SERVICE_TYPES)
36
38
 
37
39
 
38
40
  @dataclasses.dataclass
@@ -189,8 +191,15 @@ def _parse_responses(
189
191
  offset += rdlength
190
192
 
191
193
  if rtype == _TYPE_PTR:
192
- target, _ = _read_name(data, rdstart)
193
- ptrs.append((name, target))
194
+ # Only accept PTRs for the scanner service types we queried.
195
+ # We bind the mDNS port and join the multicast group, so the
196
+ # socket also receives unrelated service announcements from
197
+ # other devices on the LAN (AirPlay, screen sharing on
198
+ # _rfb._tcp, _companion-link, …); without this filter those
199
+ # iPhones/Macs/VMs get misreported as scanners.
200
+ if name.rstrip(".").lower() in _SERVICE_TYPE_SET:
201
+ target, _ = _read_name(data, rdstart)
202
+ ptrs.append((name, target))
194
203
  elif rtype == _TYPE_TXT:
195
204
  txts[name] = _parse_txt(data, rdstart, rdlength)
196
205
  elif rtype == _TYPE_A and rdlength == 4:
@@ -251,6 +260,57 @@ def _local_ipv4_addresses() -> list[str]:
251
260
  return unique
252
261
 
253
262
 
263
+ def _resolve_srv_addresses(result: _BrowseResult) -> None:
264
+ """Copy SRV-target addresses onto their service instance names.
265
+
266
+ mDNS often carries the A/AAAA records under the target hostname
267
+ rather than the service instance name, and the two may arrive in
268
+ separate packets. This links them on the accumulated result so a
269
+ service instance resolves to an IP even across packets.
270
+ """
271
+ for sname, srv in result.srvs.items():
272
+ if not result.addrs.get(sname) and result.addrs.get(srv.target):
273
+ result.addrs[sname] = list(result.addrs[srv.target])
274
+
275
+
276
+ def _is_routable_address(addr: str) -> bool:
277
+ """False for addresses that cannot be used as a connection target.
278
+
279
+ Skips IPv6 link-local (``fe80::/10``), which require a ``%zone`` scope
280
+ id we do not track and so are not connectable, and IPv4 link-local
281
+ (``169.254.0.0/16``). Some scanners advertise an extra service that
282
+ only resolves to a link-local address; surfacing it produces an
283
+ unusable, duplicate scanner entry.
284
+ """
285
+ a = addr.lower()
286
+ if a.startswith("169.254."):
287
+ return False
288
+ # IPv6 link-local fe80::/10 spans the fe80–febf prefixes.
289
+ if ":" in a and a[:3] in ("fe8", "fe9", "fea", "feb"):
290
+ return False
291
+ return True
292
+
293
+
294
+ def _best_address(addrs: list[str]) -> str | None:
295
+ """Pick the best connectable address, or ``None`` if there is none.
296
+
297
+ Prefers IPv4 (simpler and unambiguous in an ``escl:IP:PORT`` id) and
298
+ skips link-local addresses.
299
+ """
300
+ routable = [a for a in addrs if _is_routable_address(a)]
301
+ ipv4 = [a for a in routable if ":" not in a]
302
+ if ipv4:
303
+ return ipv4[0]
304
+ return routable[0] if routable else None
305
+
306
+
307
+ def _resolvable_instances(result: _BrowseResult) -> list[str]:
308
+ """Return PTR instance names that resolve to a usable (routable) IP."""
309
+ return [
310
+ inst for _svc, inst in result.ptrs if _best_address(result.addrs.get(inst, []))
311
+ ]
312
+
313
+
254
314
  def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
255
315
  """Send mDNS queries and collect responses.
256
316
 
@@ -305,34 +365,59 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
305
365
 
306
366
  sock.setblocking(False)
307
367
 
308
- # Send PTR query. On Windows, send on each interface so
309
- # scanners on any subnet see it.
310
368
  query = _build_query(*_SERVICE_TYPES)
311
- if local_addrs:
312
- for addr in local_addrs:
369
+
370
+ def _send_query() -> None:
371
+ # On Windows, send on each interface so scanners on any
372
+ # subnet see the query.
373
+ if local_addrs:
374
+ for addr in local_addrs:
375
+ try:
376
+ sock.setsockopt(
377
+ socket.IPPROTO_IP,
378
+ socket.IP_MULTICAST_IF,
379
+ socket.inet_aton(addr),
380
+ )
381
+ sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
382
+ except OSError:
383
+ pass
384
+ else:
313
385
  try:
314
- sock.setsockopt(
315
- socket.IPPROTO_IP,
316
- socket.IP_MULTICAST_IF,
317
- socket.inet_aton(addr),
318
- )
319
386
  sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
320
387
  except OSError:
321
388
  pass
322
- else:
323
- sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
324
389
 
325
- # Collect responses with early exit: once we've received at
326
- # least one response, stop after a short quiet period (no new
327
- # packets) rather than waiting the full timeout.
390
+ # Retransmit the query with growing intervals until a response
391
+ # arrives or the deadline passes: multicast is unreliable
392
+ # (especially over Wi-Fi) and a single lost query/response — or a
393
+ # responder transiently suppressing a duplicate question — would
394
+ # otherwise yield nothing for the full timeout. Standard mDNS
395
+ # one-shot queriers retransmit (RFC 6762 §5.2). Continuing to
396
+ # probe across the whole window also gives a sleeping scanner time
397
+ # to wake and answer. Once a response arrives, stop retransmitting
398
+ # and linger for a short quiet period to gather additional records.
328
399
  quiet_period = 0.5
329
400
  got_response = False
330
- deadline = time.monotonic() + timeout
401
+ start = time.monotonic()
402
+ deadline = start + timeout
403
+ send_interval = 0.25
404
+ next_send_at = 0.0 # seconds since start
331
405
  while True:
332
- remaining = deadline - time.monotonic()
406
+ now = time.monotonic()
407
+ remaining = deadline - now
333
408
  if remaining <= 0:
334
409
  break
335
- wait = min(remaining, quiet_period) if got_response else remaining
410
+ elapsed = now - start
411
+ # Fire the next (re)transmission when due, then schedule the
412
+ # following one a little further out (capped at 1s).
413
+ if not got_response and elapsed >= next_send_at:
414
+ _send_query()
415
+ next_send_at = elapsed + send_interval
416
+ send_interval = min(send_interval * 2, 1.0)
417
+ if got_response:
418
+ wait = min(remaining, quiet_period)
419
+ else:
420
+ wait = max(0.0, min(remaining, next_send_at - elapsed))
336
421
  readable, _, _ = select.select([sock], [], [], wait)
337
422
  if not readable:
338
423
  if got_response:
@@ -348,7 +433,12 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
348
433
  for k, v in addrs.items():
349
434
  result.addrs.setdefault(k, []).extend(v)
350
435
  result.srvs.update(srvs)
351
- if ptrs:
436
+ # Only treat the browse as answered once we have a service
437
+ # instance that actually resolves to an IP — a bare PTR with
438
+ # the address records still in flight is not enough, and
439
+ # exiting on it would drop the scanner.
440
+ _resolve_srv_addresses(result)
441
+ if _resolvable_instances(result):
352
442
  got_response = True
353
443
 
354
444
  except OSError:
@@ -356,6 +446,7 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
356
446
  finally:
357
447
  sock.close()
358
448
 
449
+ _resolve_srv_addresses(result)
359
450
  return result
360
451
 
361
452
 
@@ -408,8 +499,12 @@ def discover_escl_services(timeout: float = 4.0) -> list[EsclServiceInfo]:
408
499
  for svc_type, instance_name in browse.ptrs:
409
500
  txt = browse.txts.get(instance_name, {})
410
501
  srv = browse.srvs.get(instance_name)
411
- ips = browse.addrs.get(instance_name, [])
412
- if not ips:
502
+ # Pick a usable address: prefer IPv4 and skip link-local addresses.
503
+ # A service that resolves only to a link-local address is not
504
+ # connectable (no %zone scope) and would otherwise show up as a
505
+ # broken, duplicate scanner entry.
506
+ ip = _best_address(browse.addrs.get(instance_name, []))
507
+ if ip is None:
413
508
  continue
414
509
 
415
510
  tls = "_uscans._tcp" in svc_type
@@ -425,7 +520,6 @@ def discover_escl_services(timeout: float = 4.0) -> list[EsclServiceInfo]:
425
520
  continue
426
521
  seen_uuids.add(uuid)
427
522
 
428
- ip = ips[0]
429
523
  if not uuid and ip in seen_ips:
430
524
  continue
431
525
  seen_ips.add(ip)
@@ -35,6 +35,18 @@ class FeederEmptyError(ScanError):
35
35
  """The document feeder has no pages to scan."""
36
36
 
37
37
 
38
+ class ScannerBusyError(ScanError):
39
+ """The scanner is in use by another session or application."""
40
+
41
+ _DEFAULT = (
42
+ "Scanner is in use by another session or application. "
43
+ "Close it and try again."
44
+ )
45
+
46
+ def __init__(self, message: str = _DEFAULT) -> None:
47
+ super().__init__(message)
48
+
49
+
38
50
  class ScannerNotOpenError(ScanLibError):
39
51
  """Operation requires an open scanner session."""
40
52
 
@@ -267,6 +279,8 @@ class Scanner:
267
279
  *,
268
280
  scanner_id: str | None = None,
269
281
  location: str | None = None,
282
+ uuid: str | None = None,
283
+ display_name: str | None = None,
270
284
  _backend_impl: ScanBackend | None = None,
271
285
  ) -> None:
272
286
  self._name = name
@@ -275,6 +289,11 @@ class Scanner:
275
289
  self._backend = backend
276
290
  self._id = scanner_id if scanner_id is not None else name
277
291
  self._location = location
292
+ self._uuid = uuid.lower() if uuid else None
293
+ # OS-matching display label computed by the discovering backend
294
+ # (e.g. "vendor model" on SANE, the native device name elsewhere).
295
+ # str(scanner) uses it; None falls back to the generic logic below.
296
+ self._display_name = display_name
278
297
  self._backend_impl = _backend_impl
279
298
  self._sources: list[SourceInfo] = []
280
299
  self._defaults: ScannerDefaults | None = None
@@ -303,13 +322,34 @@ class Scanner:
303
322
  """
304
323
  return self._id
305
324
 
325
+ @property
326
+ def uuid(self) -> str | None:
327
+ """Lower-cased device UUID, or ``None`` when not available.
328
+
329
+ Populated on macOS (where it equals :attr:`id`), on WIA for
330
+ network/WSD scanners (extracted from the WSD port name), and on
331
+ eSCL (from the mDNS ``UUID`` record). ``None`` on SANE. Used by
332
+ the composite backend to recognise that a platform scanner and an
333
+ eSCL scanner are the same physical device.
334
+ """
335
+ return self._uuid
336
+
306
337
  def __str__(self) -> str:
307
- """Human-readable scanner label suitable for UI display."""
338
+ """Human-readable scanner label suitable for UI display.
339
+
340
+ Returns the device's ``location`` when one is set; otherwise the
341
+ backend-computed ``display_name``, which matches what the OS shows
342
+ as the scanner's name (``vendor model`` on SANE, the native device
343
+ name on macOS/WIA/eSCL). Falls back to a generic best-effort label
344
+ when no ``display_name`` was supplied (e.g. a scanner opened by id).
345
+ """
308
346
  if self._location:
309
347
  return self._location
348
+ if self._display_name:
349
+ return self._display_name
310
350
  if self._vendor and self._model:
311
351
  return f"{self._vendor} {self._model}"
312
- return self._vendor or self._model or self._name
352
+ return self._name or self._vendor or self._model
313
353
 
314
354
  @property
315
355
  def vendor(self) -> str | None:
@@ -511,7 +551,11 @@ class Scanner:
511
551
  for page in self._backend_impl.scan_pages(self, options):
512
552
  if self._abort_event.is_set():
513
553
  raise ScanAborted("Scan aborted")
514
- yield page
554
+ # Normalise to the requested color mode: scanners may
555
+ # return a richer mode than asked for (e.g. RGB when
556
+ # grayscale was requested), so down-convert here once for
557
+ # every backend rather than relying on each one.
558
+ yield _coerce_color_mode(page, options.color_mode, options.bw_threshold)
515
559
  page_count += 1
516
560
  # Feeder: backend loops internally until empty; one call is enough.
517
561
  # Flatbed: backend scans one round; ask next_page to continue.
@@ -678,6 +722,39 @@ def wait_or_cancel(
678
722
  return True
679
723
 
680
724
 
725
+ # Color-mode "richness" ordering: COLOR carries the most information,
726
+ # BW the least. We can down-convert (drop information) but never
727
+ # up-convert (fabricate channels we were not given).
728
+ _COLOR_MODE_RANK = {ColorMode.BW: 0, ColorMode.GRAY: 1, ColorMode.COLOR: 2}
729
+
730
+
731
+ def _coerce_color_mode(
732
+ page: ScannedPage, target: ColorMode, bw_threshold: int = 128
733
+ ) -> ScannedPage:
734
+ """Down-convert *page* to the *target* color mode if it is richer.
735
+
736
+ Scanners do not always honour the requested color mode — some eSCL
737
+ devices return RGB even when grayscale is requested, for example.
738
+ This normalises a page to the mode the caller asked for: COLOR→GRAY
739
+ via luminance, GRAY/COLOR→BW via threshold. Pages already at (or
740
+ below) the target richness are returned unchanged.
741
+ """
742
+ if _COLOR_MODE_RANK[page.color_mode] <= _COLOR_MODE_RANK[target]:
743
+ return page
744
+
745
+ from _scanlib_accel import gray_to_bw, rgb_to_gray
746
+
747
+ data = page.data
748
+ mode = page.color_mode
749
+ if mode == ColorMode.COLOR:
750
+ data = rgb_to_gray(data, page.width, page.height)
751
+ mode = ColorMode.GRAY
752
+ if target == ColorMode.BW and mode == ColorMode.GRAY:
753
+ data = gray_to_bw(data, page.width, page.height, bw_threshold)
754
+ mode = ColorMode.BW
755
+ return ScannedPage(data=data, width=page.width, height=page.height, color_mode=mode)
756
+
757
+
681
758
  def build_pdf(
682
759
  pages: Iterable[ScannedPage],
683
760
  *,
@@ -34,6 +34,7 @@ from .._types import (
34
34
  ScanAborted,
35
35
  ScanArea,
36
36
  ScanError,
37
+ ScannerBusyError,
37
38
  ScannedPage,
38
39
  Scanner,
39
40
  ScannerDefaults,
@@ -210,8 +211,9 @@ class _EsclConnection:
210
211
  if resp.status == 409
211
212
  else "unavailable (HTTP 503)"
212
213
  )
213
- raise ScanError(
214
- f"Scanner {reason} — " f"still not ready after {retries} attempts"
214
+ raise ScannerBusyError(
215
+ f"Scanner {reason} — in use by another session or "
216
+ f"application; still not ready after {retries} attempts."
215
217
  )
216
218
  if resp.status not in (200, 201):
217
219
  raise ScanError(f"ScanJobs POST returned {resp.status}")
@@ -484,8 +486,8 @@ def _build_scan_settings(
484
486
  res_el2.text = str(options.dpi)
485
487
 
486
488
  # Color mode — request Grayscale8 for BW since many scanners don't
487
- # support BlackAndWhite1 via eSCL; client-side conversion to 1-bit
488
- # is handled by _decode_scan_response.
489
+ # support BlackAndWhite1 via eSCL; the client-side conversion to 1-bit
490
+ # is handled centrally in Scanner.scan_pages (via _coerce_color_mode).
489
491
  escl_color = _COLOR_MODE_TO_ESCL.get(options.color_mode, "RGB24")
490
492
  if escl_color == "BlackAndWhite1":
491
493
  escl_color = "Grayscale8"
@@ -597,13 +599,15 @@ def _decode_pdf_jpeg(data: bytes) -> tuple[bytes, int, int, int]:
597
599
 
598
600
  def _decode_scan_response(
599
601
  data: bytes,
600
- color_mode: ColorMode,
601
602
  content_type: str = "image/jpeg",
602
- bw_threshold: int = 128,
603
603
  ) -> ScannedPage:
604
- """Decode scanner response into a ScannedPage.
604
+ """Decode a scanner response into a ScannedPage in its native mode.
605
605
 
606
606
  Handles JPEG, PNG, and PDF (extracts first image from PDF) responses.
607
+ The page is returned in whatever mode the scanner actually sent
608
+ (grayscale or color); coercion to the requested color mode — which
609
+ some scanners ignore — is handled centrally in ``Scanner.scan_pages``
610
+ so the conversion happens at most once and only when needed.
607
611
  """
608
612
  ct = content_type.split(";")[0].strip().lower()
609
613
 
@@ -615,19 +619,7 @@ def _decode_scan_response(
615
619
  # Default: treat as JPEG (covers image/jpeg and unknown types)
616
620
  raw_pixels, width, height, components = decode_jpeg(data)
617
621
 
618
- if components == 1:
619
- actual_mode = ColorMode.GRAY
620
- else:
621
- actual_mode = ColorMode.COLOR
622
-
623
- # If BW was requested, convert grayscale to 1-bit
624
- if color_mode == ColorMode.BW and actual_mode != ColorMode.BW:
625
- from _scanlib_accel import gray_to_bw, rgb_to_gray
626
-
627
- if actual_mode == ColorMode.COLOR:
628
- raw_pixels = rgb_to_gray(raw_pixels, width, height)
629
- raw_pixels = gray_to_bw(raw_pixels, width, height, bw_threshold)
630
- actual_mode = ColorMode.BW
622
+ actual_mode = ColorMode.GRAY if components == 1 else ColorMode.COLOR
631
623
 
632
624
  return ScannedPage(
633
625
  data=raw_pixels,
@@ -684,7 +676,11 @@ class EsclBackend:
684
676
  timeout: float = DISCOVERY_TIMEOUT,
685
677
  cancel: threading.Event | None = None,
686
678
  ) -> list[Scanner]:
687
- services = discover_escl_services(timeout=min(timeout, 4.0))
679
+ # Cap the discovery window: _browse_mdns early-exits as soon as a
680
+ # scanner resolves, so this only bounds the no-scanner case. 6s
681
+ # leaves enough room for a cold/sleeping scanner (or a lossy Wi-Fi
682
+ # link) to answer one of the retransmitted queries.
683
+ services = discover_escl_services(timeout=min(timeout, 6.0))
688
684
  scanners: list[Scanner] = []
689
685
 
690
686
  for svc in services:
@@ -702,7 +698,10 @@ class EsclBackend:
702
698
  model=None,
703
699
  backend=self.backend_name,
704
700
  scanner_id=scanner_id,
701
+ uuid=svc.uuid,
705
702
  location=svc.note,
703
+ # AirScan/AirPrint UIs show the mDNS `ty` record (name).
704
+ display_name=svc.name,
706
705
  _backend_impl=self,
707
706
  )
708
707
  )
@@ -803,12 +802,7 @@ class EsclBackend:
803
802
  doc_data, content_type = result
804
803
  page_num += 1
805
804
  try:
806
- page = _decode_scan_response(
807
- doc_data,
808
- options.color_mode,
809
- content_type,
810
- options.bw_threshold,
811
- )
805
+ page = _decode_scan_response(doc_data, content_type)
812
806
  except Exception as exc:
813
807
  raise ScanError(f"Failed to decode scan data: {exc}") from exc
814
808