scanlib 1.2.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {scanlib-1.2.0/src/scanlib.egg-info → scanlib-1.3.1}/PKG-INFO +3 -3
  2. {scanlib-1.2.0 → scanlib-1.3.1}/README.md +2 -2
  3. {scanlib-1.2.0 → scanlib-1.3.1}/pyproject.toml +1 -1
  4. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/__init__.py +86 -25
  5. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/_mdns.py +117 -23
  6. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/_types.py +98 -3
  7. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/backends/_escl.py +75 -32
  8. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/backends/_macos.py +257 -74
  9. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/backends/_sane.py +67 -19
  10. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/backends/_wia.py +262 -45
  11. {scanlib-1.2.0 → scanlib-1.3.1/src/scanlib.egg-info}/PKG-INFO +3 -3
  12. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib.egg-info/SOURCES.txt +1 -0
  13. scanlib-1.3.1/tests/test_composite.py +143 -0
  14. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_mdns.py +114 -0
  15. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_types.py +88 -4
  16. {scanlib-1.2.0 → scanlib-1.3.1}/LICENSE +0 -0
  17. {scanlib-1.2.0 → scanlib-1.3.1}/setup.cfg +0 -0
  18. {scanlib-1.2.0 → scanlib-1.3.1}/setup.py +0 -0
  19. {scanlib-1.2.0 → scanlib-1.3.1}/src/accel/_scanlib_accel.c +0 -0
  20. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/__main__.py +0 -0
  21. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/_jpeg.py +0 -0
  22. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib/backends/__init__.py +0 -0
  23. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib.egg-info/dependency_links.txt +0 -0
  24. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib.egg-info/entry_points.txt +0 -0
  25. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib.egg-info/requires.txt +0 -0
  26. {scanlib-1.2.0 → scanlib-1.3.1}/src/scanlib.egg-info/top_level.txt +0 -0
  27. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_hardware.py +0 -0
  28. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_jpeg.py +0 -0
  29. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_pdf.py +0 -0
  30. {scanlib-1.2.0 → scanlib-1.3.1}/tests/test_resolve.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanlib
3
- Version: 1.2.0
3
+ Version: 1.3.1
4
4
  Summary: A multiplatform document scanning library for Python
5
5
  Author-email: Angelo Mottola <a.mottola@gmail.com>
6
6
  License: MIT
@@ -67,10 +67,10 @@ A multiplatform document scanning library for Python with platform-native scanni
67
67
  | Platform | Backend | Scanner types | eSCL | System packages |
68
68
  |---|---|---|---|---|
69
69
  | **macOS 10.7+** | ImageCaptureCore | USB + network | Opt-in (`SCANLIB_ESCL=1`) | None |
70
- | **Windows 10+** | WIA 2.0 | USB | Always enabled | None |
70
+ | **Windows 10+** | WIA 2.0 | USB + network | Always enabled | None |
71
71
  | **Linux** | SANE | USB | Always enabled | `libsane-dev libjpeg-dev` |
72
72
 
73
- The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically. On macOS, ImageCaptureCore already handles network scanners natively; set `SCANLIB_ESCL=1` to use the eSCL backend instead.
73
+ The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically, and a network scanner found by both is reported once (matched by device UUID or IP), preferring the platform driver. On macOS, ImageCaptureCore already handles network scanners natively, so eSCL is opt-in. Set `SCANLIB_ESCL=1` to enable eSCL on macOS, or — on any platform — to prefer the eSCL driver over the platform driver for scanners seen by both.
74
74
 
75
75
  ## Installation
76
76
 
@@ -23,10 +23,10 @@ A multiplatform document scanning library for Python with platform-native scanni
23
23
  | Platform | Backend | Scanner types | eSCL | System packages |
24
24
  |---|---|---|---|---|
25
25
  | **macOS 10.7+** | ImageCaptureCore | USB + network | Opt-in (`SCANLIB_ESCL=1`) | None |
26
- | **Windows 10+** | WIA 2.0 | USB | Always enabled | None |
26
+ | **Windows 10+** | WIA 2.0 | USB + network | Always enabled | None |
27
27
  | **Linux** | SANE | USB | Always enabled | `libsane-dev libjpeg-dev` |
28
28
 
29
- The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically. On macOS, ImageCaptureCore already handles network scanners natively; set `SCANLIB_ESCL=1` to use the eSCL backend instead.
29
+ The eSCL (AirScan) backend discovers and drives network scanners directly over HTTP — no OS-level scanner drivers needed. On Linux and Windows it runs alongside the platform backend automatically, and a network scanner found by both is reported once (matched by device UUID or IP), preferring the platform driver. On macOS, ImageCaptureCore already handles network scanners natively, so eSCL is opt-in. Set `SCANLIB_ESCL=1` to enable eSCL on macOS, or — on any platform — to prefer the eSCL driver over the platform driver for scanners seen by both.
30
30
 
31
31
  ## Installation
32
32
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scanlib"
7
- version = "1.2.0"
7
+ version = "1.3.1"
8
8
  description = "A multiplatform document scanning library for Python"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -23,8 +23,10 @@ from ._types import (
23
23
  ScanError,
24
24
  ScanLibError,
25
25
  Scanner,
26
+ ScannerBusyError,
26
27
  ScannerDefaults,
27
28
  ScannerNotOpenError,
29
+ ScannerUnavailableError,
28
30
  ScanOptions,
29
31
  ScanSource,
30
32
  ScannedDocument,
@@ -52,6 +54,8 @@ __all__ = [
52
54
  "ScanLibError",
53
55
  "ScanError",
54
56
  "ScanAborted",
57
+ "ScannerBusyError",
58
+ "ScannerUnavailableError",
55
59
  "FeederEmptyError",
56
60
  "NoScannerFoundError",
57
61
  "BackendNotAvailableError",
@@ -63,19 +67,46 @@ __all__ = [
63
67
  # ---------------------------------------------------------------------------
64
68
 
65
69
 
70
+ def _await_thread(thread: threading.Thread, timeout: float) -> None:
71
+ """Wait up to *timeout* seconds for *thread* to finish.
72
+
73
+ On macOS this delegates to the backend's run-loop-aware wait so the
74
+ ImageCaptureCore platform backend — whose discovery callbacks arrive
75
+ on the main thread's run loop — isn't starved. Everywhere else a
76
+ plain blocking join is correct.
77
+ """
78
+ if sys.platform == "darwin":
79
+ from .backends._macos import await_thread
80
+
81
+ await_thread(thread, timeout)
82
+ else:
83
+ thread.join(timeout)
84
+
85
+
66
86
  class _CompositeBackend:
67
87
  """Merges results from a platform backend and the eSCL backend.
68
88
 
69
89
  ``list_scanners`` runs both backends' discovery in parallel and
70
- deduplicates by IP address. Each scanner's ``_backend_impl`` points
71
- to whichever backend discovered it.
90
+ deduplicates: a platform scanner and an eSCL scanner are the same
91
+ physical device when their UUIDs match or their IPs match. Each
92
+ scanner's ``_backend_impl`` points to whichever backend discovered it.
93
+
94
+ *prefer_escl* decides which entry survives a duplicate. On Linux and
95
+ Windows the platform driver is preferred (the platform backend is
96
+ always on), so the eSCL duplicate is dropped. On macOS the eSCL
97
+ backend is opt-in (``SCANLIB_ESCL=1``); enabling it signals a
98
+ preference for the eSCL driver, so there the platform duplicate is
99
+ dropped instead.
72
100
  """
73
101
 
74
- def __init__(self, platform_backend: ScanBackend) -> None:
102
+ def __init__(
103
+ self, platform_backend: ScanBackend, *, prefer_escl: bool = False
104
+ ) -> None:
75
105
  from .backends._escl import EsclBackend
76
106
 
77
107
  self._platform = platform_backend
78
108
  self._escl = EsclBackend()
109
+ self._prefer_escl = prefer_escl
79
110
 
80
111
  def list_scanners(
81
112
  self,
@@ -105,12 +136,14 @@ class _CompositeBackend:
105
136
  t_platform.start()
106
137
  t_escl.start()
107
138
 
108
- # Wait for eSCL first (fast, ~4s max), then give the platform
139
+ # Wait for eSCL first (fast, ~6s max), then give the platform
109
140
  # backend a short grace period to finish. Don't block on a
110
- # slow platform backend when eSCL already has results.
111
- t_escl.join(timeout=timeout + 2)
141
+ # slow platform backend when eSCL already has results. _await_thread
142
+ # pumps the macOS run loop while waiting (see its docstring) so the
143
+ # ImageCaptureCore platform backend isn't starved.
144
+ _await_thread(t_escl, timeout + 2)
112
145
  if t_platform.is_alive():
113
- t_platform.join(timeout=2.0)
146
+ _await_thread(t_platform, 2.0)
114
147
 
115
148
  if cancel is not None and cancel.is_set():
116
149
  return []
@@ -120,25 +153,46 @@ class _CompositeBackend:
120
153
 
121
154
  if not escl_scanners:
122
155
  return platform_scanners
156
+ if not platform_scanners:
157
+ return escl_scanners
123
158
 
124
- # Collect IPs from platform scanners for deduplication
125
159
  from ._mdns import extract_ip_from_uri
126
160
 
127
- platform_ips: set[str] = set()
128
- for s in platform_scanners:
129
- ip = extract_ip_from_uri(s.name)
130
- if ip:
131
- platform_ips.add(ip)
132
-
133
- # Add eSCL scanners not already found by the platform backend
161
+ # A platform scanner and an eSCL scanner are the same physical
162
+ # device when their UUIDs match or their IPs match. The eSCL IP
163
+ # comes from mDNS (keyed by scanner id); the platform IP, when it
164
+ # has one, is parsed out of its name (e.g. a SANE ``escl:`` URI).
165
+ # On macOS the platform scanner exposes only a UUID, on Windows a
166
+ # WSD scanner exposes a UUID, on Linux a network SANE device
167
+ # exposes an IP so matching on either key covers them all. The
168
+ # identity sets never contain None, so a missing UUID/IP simply
169
+ # never matches (no explicit guard needed).
134
170
  escl_ips = self._escl.get_scanner_ips()
135
- for s in escl_scanners:
136
- ip = escl_ips.get(s.id)
137
- if ip and ip in platform_ips:
138
- continue # already discovered by platform backend
139
- platform_scanners.append(s)
140
171
 
141
- return platform_scanners
172
+ if self._prefer_escl:
173
+ # Keep every eSCL entry; drop the platform duplicates.
174
+ escl_uuids = {s.uuid for s in escl_scanners if s.uuid}
175
+ escl_ip_set = set(escl_ips.values())
176
+ kept = [
177
+ s
178
+ for s in platform_scanners
179
+ if s.uuid not in escl_uuids
180
+ and extract_ip_from_uri(s.name) not in escl_ip_set
181
+ ]
182
+ return kept + escl_scanners
183
+
184
+ # Keep every platform entry; drop the eSCL duplicates.
185
+ platform_uuids = {s.uuid for s in platform_scanners if s.uuid}
186
+ platform_ip_set = {
187
+ ip for s in platform_scanners if (ip := extract_ip_from_uri(s.name))
188
+ }
189
+ kept = [
190
+ s
191
+ for s in escl_scanners
192
+ if s.uuid not in platform_uuids
193
+ and escl_ips.get(s.id) not in platform_ip_set
194
+ ]
195
+ return platform_scanners + kept
142
196
 
143
197
  # Delegate remaining methods to the scanner's own _backend_impl
144
198
  def open_scanner(self, scanner: Scanner) -> None:
@@ -169,23 +223,30 @@ def _get_backend() -> ScanBackend:
169
223
  if _backend is not None:
170
224
  return _backend
171
225
 
226
+ # Setting SCANLIB_ESCL=1 signals a preference for the eSCL driver over
227
+ # the platform driver when both discover the same device.
228
+ prefer_escl = os.environ.get("SCANLIB_ESCL", "").strip() == "1"
229
+
172
230
  if sys.platform == "linux":
173
231
  from .backends._sane import SaneBackend
174
232
 
175
- _backend = _CompositeBackend(SaneBackend())
233
+ _backend = _CompositeBackend(SaneBackend(), prefer_escl=prefer_escl)
176
234
 
177
235
  elif sys.platform == "darwin":
178
236
  from .backends._macos import MacOSBackend
179
237
 
180
- if os.environ.get("SCANLIB_ESCL", "").strip() == "1":
181
- _backend = _CompositeBackend(MacOSBackend())
238
+ # On macOS the eSCL backend is opt-in (ImageCaptureCore already
239
+ # handles eSCL natively), so the composite is only used when
240
+ # SCANLIB_ESCL=1 — and enabling it implies preferring eSCL.
241
+ if prefer_escl:
242
+ _backend = _CompositeBackend(MacOSBackend(), prefer_escl=True)
182
243
  else:
183
244
  _backend = MacOSBackend()
184
245
 
185
246
  elif sys.platform == "win32":
186
247
  from .backends._wia import WiaBackend
187
248
 
188
- _backend = _CompositeBackend(WiaBackend())
249
+ _backend = _CompositeBackend(WiaBackend(), prefer_escl=prefer_escl)
189
250
 
190
251
  else:
191
252
  raise BackendNotAvailableError(f"Unsupported platform: {sys.platform}")
@@ -33,6 +33,8 @@ _SERVICE_TYPES = (
33
33
  "_uscan._tcp.local.",
34
34
  "_uscans._tcp.local.",
35
35
  )
36
+ # Normalized (no trailing dot, lower-case) set for matching PTR owner names.
37
+ _SERVICE_TYPE_SET = frozenset(s.rstrip(".").lower() for s in _SERVICE_TYPES)
36
38
 
37
39
 
38
40
  @dataclasses.dataclass
@@ -189,8 +191,15 @@ def _parse_responses(
189
191
  offset += rdlength
190
192
 
191
193
  if rtype == _TYPE_PTR:
192
- target, _ = _read_name(data, rdstart)
193
- ptrs.append((name, target))
194
+ # Only accept PTRs for the scanner service types we queried.
195
+ # We bind the mDNS port and join the multicast group, so the
196
+ # socket also receives unrelated service announcements from
197
+ # other devices on the LAN (AirPlay, screen sharing on
198
+ # _rfb._tcp, _companion-link, …); without this filter those
199
+ # iPhones/Macs/VMs get misreported as scanners.
200
+ if name.rstrip(".").lower() in _SERVICE_TYPE_SET:
201
+ target, _ = _read_name(data, rdstart)
202
+ ptrs.append((name, target))
194
203
  elif rtype == _TYPE_TXT:
195
204
  txts[name] = _parse_txt(data, rdstart, rdlength)
196
205
  elif rtype == _TYPE_A and rdlength == 4:
@@ -251,6 +260,57 @@ def _local_ipv4_addresses() -> list[str]:
251
260
  return unique
252
261
 
253
262
 
263
+ def _resolve_srv_addresses(result: _BrowseResult) -> None:
264
+ """Copy SRV-target addresses onto their service instance names.
265
+
266
+ mDNS often carries the A/AAAA records under the target hostname
267
+ rather than the service instance name, and the two may arrive in
268
+ separate packets. This links them on the accumulated result so a
269
+ service instance resolves to an IP even across packets.
270
+ """
271
+ for sname, srv in result.srvs.items():
272
+ if not result.addrs.get(sname) and result.addrs.get(srv.target):
273
+ result.addrs[sname] = list(result.addrs[srv.target])
274
+
275
+
276
+ def _is_routable_address(addr: str) -> bool:
277
+ """False for addresses that cannot be used as a connection target.
278
+
279
+ Skips IPv6 link-local (``fe80::/10``), which require a ``%zone`` scope
280
+ id we do not track and so are not connectable, and IPv4 link-local
281
+ (``169.254.0.0/16``). Some scanners advertise an extra service that
282
+ only resolves to a link-local address; surfacing it produces an
283
+ unusable, duplicate scanner entry.
284
+ """
285
+ a = addr.lower()
286
+ if a.startswith("169.254."):
287
+ return False
288
+ # IPv6 link-local fe80::/10 spans the fe80–febf prefixes.
289
+ if ":" in a and a[:3] in ("fe8", "fe9", "fea", "feb"):
290
+ return False
291
+ return True
292
+
293
+
294
+ def _best_address(addrs: list[str]) -> str | None:
295
+ """Pick the best connectable address, or ``None`` if there is none.
296
+
297
+ Prefers IPv4 (simpler and unambiguous in an ``escl:IP:PORT`` id) and
298
+ skips link-local addresses.
299
+ """
300
+ routable = [a for a in addrs if _is_routable_address(a)]
301
+ ipv4 = [a for a in routable if ":" not in a]
302
+ if ipv4:
303
+ return ipv4[0]
304
+ return routable[0] if routable else None
305
+
306
+
307
+ def _resolvable_instances(result: _BrowseResult) -> list[str]:
308
+ """Return PTR instance names that resolve to a usable (routable) IP."""
309
+ return [
310
+ inst for _svc, inst in result.ptrs if _best_address(result.addrs.get(inst, []))
311
+ ]
312
+
313
+
254
314
  def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
255
315
  """Send mDNS queries and collect responses.
256
316
 
@@ -305,34 +365,59 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
305
365
 
306
366
  sock.setblocking(False)
307
367
 
308
- # Send PTR query. On Windows, send on each interface so
309
- # scanners on any subnet see it.
310
368
  query = _build_query(*_SERVICE_TYPES)
311
- if local_addrs:
312
- for addr in local_addrs:
369
+
370
+ def _send_query() -> None:
371
+ # On Windows, send on each interface so scanners on any
372
+ # subnet see the query.
373
+ if local_addrs:
374
+ for addr in local_addrs:
375
+ try:
376
+ sock.setsockopt(
377
+ socket.IPPROTO_IP,
378
+ socket.IP_MULTICAST_IF,
379
+ socket.inet_aton(addr),
380
+ )
381
+ sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
382
+ except OSError:
383
+ pass
384
+ else:
313
385
  try:
314
- sock.setsockopt(
315
- socket.IPPROTO_IP,
316
- socket.IP_MULTICAST_IF,
317
- socket.inet_aton(addr),
318
- )
319
386
  sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
320
387
  except OSError:
321
388
  pass
322
- else:
323
- sock.sendto(query, (_MDNS_ADDR, _MDNS_PORT))
324
389
 
325
- # Collect responses with early exit: once we've received at
326
- # least one response, stop after a short quiet period (no new
327
- # packets) rather than waiting the full timeout.
390
+ # Retransmit the query with growing intervals until a response
391
+ # arrives or the deadline passes: multicast is unreliable
392
+ # (especially over Wi-Fi) and a single lost query/response — or a
393
+ # responder transiently suppressing a duplicate question — would
394
+ # otherwise yield nothing for the full timeout. Standard mDNS
395
+ # one-shot queriers retransmit (RFC 6762 §5.2). Continuing to
396
+ # probe across the whole window also gives a sleeping scanner time
397
+ # to wake and answer. Once a response arrives, stop retransmitting
398
+ # and linger for a short quiet period to gather additional records.
328
399
  quiet_period = 0.5
329
400
  got_response = False
330
- deadline = time.monotonic() + timeout
401
+ start = time.monotonic()
402
+ deadline = start + timeout
403
+ send_interval = 0.25
404
+ next_send_at = 0.0 # seconds since start
331
405
  while True:
332
- remaining = deadline - time.monotonic()
406
+ now = time.monotonic()
407
+ remaining = deadline - now
333
408
  if remaining <= 0:
334
409
  break
335
- wait = min(remaining, quiet_period) if got_response else remaining
410
+ elapsed = now - start
411
+ # Fire the next (re)transmission when due, then schedule the
412
+ # following one a little further out (capped at 1s).
413
+ if not got_response and elapsed >= next_send_at:
414
+ _send_query()
415
+ next_send_at = elapsed + send_interval
416
+ send_interval = min(send_interval * 2, 1.0)
417
+ if got_response:
418
+ wait = min(remaining, quiet_period)
419
+ else:
420
+ wait = max(0.0, min(remaining, next_send_at - elapsed))
336
421
  readable, _, _ = select.select([sock], [], [], wait)
337
422
  if not readable:
338
423
  if got_response:
@@ -348,7 +433,12 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
348
433
  for k, v in addrs.items():
349
434
  result.addrs.setdefault(k, []).extend(v)
350
435
  result.srvs.update(srvs)
351
- if ptrs:
436
+ # Only treat the browse as answered once we have a service
437
+ # instance that actually resolves to an IP — a bare PTR with
438
+ # the address records still in flight is not enough, and
439
+ # exiting on it would drop the scanner.
440
+ _resolve_srv_addresses(result)
441
+ if _resolvable_instances(result):
352
442
  got_response = True
353
443
 
354
444
  except OSError:
@@ -356,6 +446,7 @@ def _browse_mdns(timeout: float = 4.0) -> _BrowseResult:
356
446
  finally:
357
447
  sock.close()
358
448
 
449
+ _resolve_srv_addresses(result)
359
450
  return result
360
451
 
361
452
 
@@ -408,8 +499,12 @@ def discover_escl_services(timeout: float = 4.0) -> list[EsclServiceInfo]:
408
499
  for svc_type, instance_name in browse.ptrs:
409
500
  txt = browse.txts.get(instance_name, {})
410
501
  srv = browse.srvs.get(instance_name)
411
- ips = browse.addrs.get(instance_name, [])
412
- if not ips:
502
+ # Pick a usable address: prefer IPv4 and skip link-local addresses.
503
+ # A service that resolves only to a link-local address is not
504
+ # connectable (no %zone scope) and would otherwise show up as a
505
+ # broken, duplicate scanner entry.
506
+ ip = _best_address(browse.addrs.get(instance_name, []))
507
+ if ip is None:
413
508
  continue
414
509
 
415
510
  tls = "_uscans._tcp" in svc_type
@@ -425,7 +520,6 @@ def discover_escl_services(timeout: float = 4.0) -> list[EsclServiceInfo]:
425
520
  continue
426
521
  seen_uuids.add(uuid)
427
522
 
428
- ip = ips[0]
429
523
  if not uuid and ip in seen_ips:
430
524
  continue
431
525
  seen_ips.add(ip)
@@ -35,6 +35,36 @@ class FeederEmptyError(ScanError):
35
35
  """The document feeder has no pages to scan."""
36
36
 
37
37
 
38
+ class ScannerBusyError(ScanError):
39
+ """The scanner is in use by another session or application."""
40
+
41
+ _DEFAULT = (
42
+ "Scanner is in use by another session or application. "
43
+ "Close it and try again."
44
+ )
45
+
46
+ def __init__(self, message: str = _DEFAULT) -> None:
47
+ super().__init__(message)
48
+
49
+
50
+ class ScannerUnavailableError(ScanError):
51
+ """The scanner could not be reached (offline, asleep, or disconnected).
52
+
53
+ Unlike :class:`ScannerBusyError`, the device is not held by another
54
+ session — it simply did not respond. This is often transient (a network
55
+ scanner that has gone to sleep) and worth retrying once the device is
56
+ reachable again.
57
+ """
58
+
59
+ _DEFAULT = (
60
+ "Scanner is unavailable — it may be offline, asleep, or disconnected. "
61
+ "Check that it is powered on and reachable, then try again."
62
+ )
63
+
64
+ def __init__(self, message: str = _DEFAULT) -> None:
65
+ super().__init__(message)
66
+
67
+
38
68
  class ScannerNotOpenError(ScanLibError):
39
69
  """Operation requires an open scanner session."""
40
70
 
@@ -267,6 +297,8 @@ class Scanner:
267
297
  *,
268
298
  scanner_id: str | None = None,
269
299
  location: str | None = None,
300
+ uuid: str | None = None,
301
+ display_name: str | None = None,
270
302
  _backend_impl: ScanBackend | None = None,
271
303
  ) -> None:
272
304
  self._name = name
@@ -275,6 +307,11 @@ class Scanner:
275
307
  self._backend = backend
276
308
  self._id = scanner_id if scanner_id is not None else name
277
309
  self._location = location
310
+ self._uuid = uuid.lower() if uuid else None
311
+ # OS-matching display label computed by the discovering backend
312
+ # (e.g. "vendor model" on SANE, the native device name elsewhere).
313
+ # str(scanner) uses it; None falls back to the generic logic below.
314
+ self._display_name = display_name
278
315
  self._backend_impl = _backend_impl
279
316
  self._sources: list[SourceInfo] = []
280
317
  self._defaults: ScannerDefaults | None = None
@@ -303,13 +340,34 @@ class Scanner:
303
340
  """
304
341
  return self._id
305
342
 
343
+ @property
344
+ def uuid(self) -> str | None:
345
+ """Lower-cased device UUID, or ``None`` when not available.
346
+
347
+ Populated on macOS (where it equals :attr:`id`), on WIA for
348
+ network/WSD scanners (extracted from the WSD port name), and on
349
+ eSCL (from the mDNS ``UUID`` record). ``None`` on SANE. Used by
350
+ the composite backend to recognise that a platform scanner and an
351
+ eSCL scanner are the same physical device.
352
+ """
353
+ return self._uuid
354
+
306
355
  def __str__(self) -> str:
307
- """Human-readable scanner label suitable for UI display."""
356
+ """Human-readable scanner label suitable for UI display.
357
+
358
+ Returns the device's ``location`` when one is set; otherwise the
359
+ backend-computed ``display_name``, which matches what the OS shows
360
+ as the scanner's name (``vendor model`` on SANE, the native device
361
+ name on macOS/WIA/eSCL). Falls back to a generic best-effort label
362
+ when no ``display_name`` was supplied (e.g. a scanner opened by id).
363
+ """
308
364
  if self._location:
309
365
  return self._location
366
+ if self._display_name:
367
+ return self._display_name
310
368
  if self._vendor and self._model:
311
369
  return f"{self._vendor} {self._model}"
312
- return self._vendor or self._model or self._name
370
+ return self._name or self._vendor or self._model
313
371
 
314
372
  @property
315
373
  def vendor(self) -> str | None:
@@ -511,7 +569,11 @@ class Scanner:
511
569
  for page in self._backend_impl.scan_pages(self, options):
512
570
  if self._abort_event.is_set():
513
571
  raise ScanAborted("Scan aborted")
514
- yield page
572
+ # Normalise to the requested color mode: scanners may
573
+ # return a richer mode than asked for (e.g. RGB when
574
+ # grayscale was requested), so down-convert here once for
575
+ # every backend rather than relying on each one.
576
+ yield _coerce_color_mode(page, options.color_mode, options.bw_threshold)
515
577
  page_count += 1
516
578
  # Feeder: backend loops internally until empty; one call is enough.
517
579
  # Flatbed: backend scans one round; ask next_page to continue.
@@ -678,6 +740,39 @@ def wait_or_cancel(
678
740
  return True
679
741
 
680
742
 
743
+ # Color-mode "richness" ordering: COLOR carries the most information,
744
+ # BW the least. We can down-convert (drop information) but never
745
+ # up-convert (fabricate channels we were not given).
746
+ _COLOR_MODE_RANK = {ColorMode.BW: 0, ColorMode.GRAY: 1, ColorMode.COLOR: 2}
747
+
748
+
749
+ def _coerce_color_mode(
750
+ page: ScannedPage, target: ColorMode, bw_threshold: int = 128
751
+ ) -> ScannedPage:
752
+ """Down-convert *page* to the *target* color mode if it is richer.
753
+
754
+ Scanners do not always honour the requested color mode — some eSCL
755
+ devices return RGB even when grayscale is requested, for example.
756
+ This normalises a page to the mode the caller asked for: COLOR→GRAY
757
+ via luminance, GRAY/COLOR→BW via threshold. Pages already at (or
758
+ below) the target richness are returned unchanged.
759
+ """
760
+ if _COLOR_MODE_RANK[page.color_mode] <= _COLOR_MODE_RANK[target]:
761
+ return page
762
+
763
+ from _scanlib_accel import gray_to_bw, rgb_to_gray
764
+
765
+ data = page.data
766
+ mode = page.color_mode
767
+ if mode == ColorMode.COLOR:
768
+ data = rgb_to_gray(data, page.width, page.height)
769
+ mode = ColorMode.GRAY
770
+ if target == ColorMode.BW and mode == ColorMode.GRAY:
771
+ data = gray_to_bw(data, page.width, page.height, bw_threshold)
772
+ mode = ColorMode.BW
773
+ return ScannedPage(data=data, width=page.width, height=page.height, color_mode=mode)
774
+
775
+
681
776
  def build_pdf(
682
777
  pages: Iterable[ScannedPage],
683
778
  *,