async-batch-llm 0.7.0__tar.gz → 0.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/CHANGELOG.md +38 -0
  2. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/PKG-INFO +1 -1
  3. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/pyproject.toml +1 -1
  4. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/models.py +65 -18
  5. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/uv.lock +17 -4
  6. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/.gitignore +0 -0
  7. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/LICENSE +0 -0
  8. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/README.md +0 -0
  9. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/API.md +0 -0
  10. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/GEMINI_INTEGRATION.md +0 -0
  11. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/MIGRATION_V0_4.md +0 -0
  12. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/api/core.md +0 -0
  13. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/api/observers.md +0 -0
  14. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/api/strategies.md +0 -0
  15. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/BATCH_LLM_FEEDBACK.md +0 -0
  16. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/BATCH_LLM_FEEDBACK_OLD.md +0 -0
  17. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/IMPLEMENTATION_PLAN_V0_2.md +0 -0
  18. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/IMPLEMENTATION_PLAN_V0_3.md +0 -0
  19. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/IMPROVEMENT_PLAN.md +0 -0
  20. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/MIGRATION.md +0 -0
  21. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/MIGRATION_V0_1.md +0 -0
  22. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/MIGRATION_V0_2.md +0 -0
  23. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/MIGRATION_V0_3.md +0 -0
  24. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/archive/README.md +0 -0
  25. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/contributing.md +0 -0
  26. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/examples/advanced.md +0 -0
  27. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/examples/basic.md +0 -0
  28. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/examples/custom-strategies.md +0 -0
  29. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/getting-started.md +0 -0
  30. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/index.md +0 -0
  31. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/migration/v0.1.md +0 -0
  32. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/docs/migration/v0.4.md +0 -0
  33. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/README.md +0 -0
  34. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/__init__.py +0 -0
  35. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example.py +0 -0
  36. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_anthropic.py +0 -0
  37. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_context_manager.py +0 -0
  38. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_gemini_direct.py +0 -0
  39. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_gemini_smart_retry.py +0 -0
  40. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_langchain.py +0 -0
  41. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_llm_strategies.py +0 -0
  42. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_model_escalation.py +0 -0
  43. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_openai.py +0 -0
  44. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/examples/example_smart_model_escalation.py +0 -0
  45. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/__init__.py +0 -0
  46. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/_internal/__init__.py +0 -0
  47. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/_internal/error_logging.py +0 -0
  48. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/_internal/event_dispatcher.py +0 -0
  49. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/_internal/rate_limit_coordinator.py +0 -0
  50. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/_internal/strategy_lifecycle.py +0 -0
  51. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/base.py +0 -0
  52. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/classifiers/__init__.py +0 -0
  53. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/classifiers/gemini.py +0 -0
  54. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/core/__init__.py +0 -0
  55. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/core/config.py +0 -0
  56. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/core/protocols.py +0 -0
  57. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/llm_strategies.py +0 -0
  58. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/middleware/__init__.py +0 -0
  59. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/middleware/base.py +0 -0
  60. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/observers/__init__.py +0 -0
  61. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/observers/base.py +0 -0
  62. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/observers/metrics.py +0 -0
  63. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/parallel.py +0 -0
  64. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/strategies/__init__.py +0 -0
  65. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/strategies/errors.py +0 -0
  66. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/strategies/rate_limit.py +0 -0
  67. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/testing/__init__.py +0 -0
  68. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/testing/mocks.py +0 -0
  69. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/testing/strategies.py +0 -0
  70. {async_batch_llm-0.7.0 → async_batch_llm-0.7.2}/src/async_batch_llm/token_extractor.py +0 -0
@@ -7,6 +7,44 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.7.2] - 2026-04-22
11
+
12
+ ### Fixed
13
+
14
+ - `GeminiCachedModel.generate()` no longer emits misleading "Cache expired" log
15
+ lines with a ~56-year age under concurrent workers. The log is now inside the
16
+ cache lock and after the double-check, so only the worker that actually renews
17
+ logs the message; losing-race workers stay silent. Age is rendered as
18
+ "unknown (cache not yet initialized)" when `_cache_created_at` is `None`
19
+ instead of being computed as `time.time() - 0`.
20
+
21
+ ### Security
22
+
23
+ - Bumped transitive dependency `authlib` 1.6.10 → 1.7.0 to clear
24
+ [GHSA-jj8c-mmj3-mmgv](https://github.com/authlib/authlib/security/advisories/GHSA-jj8c-mmj3-mmgv)
25
+ (CSRF in Authlib's OAuth cache path, medium/CVSS 5.4). async-batch-llm does not
26
+ use Authlib directly — it reaches us via `pydantic-ai[fastmcp]` → `fastmcp` —
27
+ and the vulnerable code path is not exercised, but the bump clears the
28
+ Dependabot alert.
29
+
30
+ ## [0.7.1] - 2026-04-22
31
+
32
+ ### Fixed
33
+
34
+ - `GeminiCachedModel.prepare()` no longer crashes with `CreateCachedContentConfig`'s
35
+ `extra_forbidden` ValidationError when `cache_tags` is non-empty. google-genai's
36
+ `CreateCachedContentConfig` has no `metadata` field in the 1.x line; tags are now
37
+ encoded into the cache's `display_name` with a sentinel prefix (`abl-tags:<json>`)
38
+ and decoded on lookup. Previously any `GeminiCachedModel` with a non-empty
39
+ `cache_tags=` dict failed every worker's prepare() on current google-genai versions.
40
+
41
+ ### Changed
42
+
43
+ - `cache_tags` are persisted in `CachedContent.display_name` instead of `metadata`.
44
+ Tag values should stay short — Gemini's `display_name` has a 128-character limit.
45
+ Caches created outside async-batch-llm (no `abl-tags:` prefix on display_name) are
46
+ treated as untagged and won't match a `GeminiCachedModel` with `cache_tags` set.
47
+
10
48
  ## [0.7.0] - 2026-04-16
11
49
 
12
50
  Internal refactor release. Public API (`async_batch_llm/__init__.py`) is unchanged —
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: async-batch-llm
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Provider-agnostic framework for high-throughput LLM processing with async workers, automatic retries, rate limiting, and intelligent validation recovery.
5
5
  Project-URL: Homepage, https://github.com/geoff-davis/async-batch-llm
6
6
  Project-URL: Documentation, https://geoff-davis.github.io/async-batch-llm/
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "async-batch-llm"
7
- version = "0.7.0"
7
+ version = "0.7.2"
8
8
  description = "Provider-agnostic framework for high-throughput LLM processing with async workers, automatic retries, rate limiting, and intelligent validation recovery."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -7,12 +7,19 @@ without knowing about provider-specific details.
7
7
  Added in v0.6.0.
8
8
  """
9
9
 
10
+ import json
10
11
  import logging
11
12
  import time
12
13
  from typing import TYPE_CHECKING, Any
13
14
 
14
15
  from .base import LLMResponse
15
16
 
17
+ # Sentinel prefix for encoding cache_tags into Gemini's CachedContent.display_name.
18
+ # google-genai's CreateCachedContentConfig does not expose a metadata field, so we
19
+ # round-trip tags through display_name, marked with this prefix so we can tell
20
+ # async-batch-llm-tagged caches apart from caches with user-chosen display names.
21
+ _TAG_DISPLAY_NAME_PREFIX = "abl-tags:"
22
+
16
23
  # Conditional imports for optional dependencies
17
24
  if TYPE_CHECKING:
18
25
  from google import genai
@@ -28,6 +35,35 @@ else:
28
35
  logger = logging.getLogger(__name__)
29
36
 
30
37
 
38
+ def _encode_tags_to_display_name(tags: dict[str, str]) -> str:
39
+ """Encode cache_tags as a deterministic string for the CachedContent display_name.
40
+
41
+ Uses sorted, compact JSON so equal tag sets always produce the same display_name —
42
+ critical for cache lookup to match. Prefixed with a sentinel so we can tell our
43
+ tag encoding apart from a user-assigned display name.
44
+ """
45
+ encoded = json.dumps(tags, sort_keys=True, separators=(",", ":"))
46
+ return f"{_TAG_DISPLAY_NAME_PREFIX}{encoded}"
47
+
48
+
49
+ def _decode_tags_from_display_name(display_name: str | None) -> dict[str, str] | None:
50
+ """Decode cache_tags from a CachedContent display_name.
51
+
52
+ Returns None when the display_name is absent or was not produced by
53
+ _encode_tags_to_display_name. Callers should treat None as "this cache has no
54
+ tag metadata we can match against".
55
+ """
56
+ if not display_name or not display_name.startswith(_TAG_DISPLAY_NAME_PREFIX):
57
+ return None
58
+ try:
59
+ decoded = json.loads(display_name[len(_TAG_DISPLAY_NAME_PREFIX) :])
60
+ except ValueError:
61
+ return None
62
+ if not isinstance(decoded, dict):
63
+ return None
64
+ return decoded
65
+
66
+
31
67
  def _extract_metadata(response: Any) -> dict[str, Any] | None:
32
68
  """Extract safety ratings and finish reason from a Gemini response."""
33
69
  metadata: dict[str, Any] = {}
@@ -40,9 +76,7 @@ def _extract_metadata(response: Any) -> dict[str, Any] | None:
40
76
  if hasattr(candidate, "safety_ratings") and candidate.safety_ratings:
41
77
  ratings: dict[str, str] = {}
42
78
  for rating in candidate.safety_ratings:
43
- category = (
44
- str(rating.category) if hasattr(rating, "category") else "UNKNOWN"
45
- )
79
+ category = str(rating.category) if hasattr(rating, "category") else "UNKNOWN"
46
80
  probability = (
47
81
  str(rating.probability) if hasattr(rating, "probability") else "UNKNOWN"
48
82
  )
@@ -242,7 +276,10 @@ class GeminiCachedModel:
242
276
  cache_renewal_buffer_seconds: Renew this many seconds before expiry
243
277
  (default: 300 = 5 minutes).
244
278
  auto_renew: Auto-renew expired caches in generate() (default: True).
245
- cache_tags: Tags for precise cache matching.
279
+ cache_tags: Tags for precise cache matching. Encoded into the cache's
280
+ ``display_name`` at creation (google-genai ``CreateCachedContentConfig``
281
+ has no ``metadata`` field) and decoded on lookup. Keep tag values
282
+ short — Gemini's ``display_name`` has a 128-character limit.
246
283
  safety_settings: Default safety settings for all calls.
247
284
  """
248
285
  if genai is None:
@@ -293,7 +330,6 @@ class GeminiCachedModel:
293
330
  self._cache_lock: Any = None
294
331
  self._prepared = False
295
332
 
296
-
297
333
  @property
298
334
  def cache_name(self) -> str | None:
299
335
  """The name of the active cache, or None."""
@@ -396,12 +432,6 @@ class GeminiCachedModel:
396
432
 
397
433
  # Auto-renew if expired
398
434
  if self._auto_renew and self._is_cache_expired():
399
- logger.info(
400
- "Cache expired or about to expire, renewing before API call "
401
- f"(age: {time.time() - (self._cache_created_at or 0):.0f}s, "
402
- f"renewal buffer: {self._cache_renewal_buffer_seconds}s)"
403
- )
404
-
405
435
  import asyncio
406
436
 
407
437
  if self._cache_lock is None:
@@ -409,6 +439,16 @@ class GeminiCachedModel:
409
439
 
410
440
  async with self._cache_lock:
411
441
  if self._is_cache_expired():
442
+ age_str = (
443
+ f"{time.time() - self._cache_created_at:.0f}s"
444
+ if self._cache_created_at is not None
445
+ else "unknown (cache not yet initialized)"
446
+ )
447
+ logger.info(
448
+ "Cache expired or about to expire, renewing before API call "
449
+ f"(age: {age_str}, "
450
+ f"renewal buffer: {self._cache_renewal_buffer_seconds}s)"
451
+ )
412
452
  self._cache = None
413
453
  self._cache_created_at = None
414
454
  self._prepared = False
@@ -476,14 +516,20 @@ class GeminiCachedModel:
476
516
  continue
477
517
 
478
518
  if self._cache_tags:
479
- cache_metadata = getattr(cache, "metadata", {}) or {}
480
- tags_match = all(
481
- cache_metadata.get(k) == v for k, v in self._cache_tags.items()
519
+ cache_tags = _decode_tags_from_display_name(
520
+ getattr(cache, "display_name", None)
482
521
  )
522
+ if cache_tags is None:
523
+ logger.debug(
524
+ f"Skipping cache {cache.name}: no abl-tags display_name "
525
+ f"(want {self._cache_tags})"
526
+ )
527
+ continue
528
+ tags_match = all(cache_tags.get(k) == v for k, v in self._cache_tags.items())
483
529
  if not tags_match:
484
530
  logger.debug(
485
531
  f"Skipping cache {cache.name}: tags don't match "
486
- f"(want {self._cache_tags}, has {cache_metadata})"
532
+ f"(want {self._cache_tags}, has {cache_tags})"
487
533
  )
488
534
  continue
489
535
 
@@ -496,8 +542,7 @@ class GeminiCachedModel:
496
542
  tag_info = f" with tags {self._cache_tags}" if self._cache_tags else ""
497
543
  age = time.time() - self._cache_created_at
498
544
  logger.info(
499
- f"Reusing existing Gemini cache: {self._cache.name}{tag_info} "
500
- f"(age: {age:.0f}s)"
545
+ f"Reusing existing Gemini cache: {self._cache.name}{tag_info} (age: {age:.0f}s)"
501
546
  )
502
547
  return
503
548
  except Exception as e:
@@ -513,7 +558,9 @@ class GeminiCachedModel:
513
558
  "ttl": f"{self._cache_ttl_seconds}s",
514
559
  }
515
560
  if self._cache_tags:
516
- config_kwargs["metadata"] = self._cache_tags
561
+ # google-genai's CreateCachedContentConfig has no `metadata` field
562
+ # round-trip tags through `display_name` with a sentinel prefix.
563
+ config_kwargs["display_name"] = _encode_tags_to_display_name(self._cache_tags)
517
564
 
518
565
  self._cache = await self._client.aio.caches.create(
519
566
  model=self._model,
@@ -239,7 +239,7 @@ wheels = [
239
239
 
240
240
  [[package]]
241
241
  name = "async-batch-llm"
242
- version = "0.7.0"
242
+ version = "0.7.2"
243
243
  source = { editable = "." }
244
244
  dependencies = [
245
245
  { name = "aiolimiter" },
@@ -319,14 +319,15 @@ wheels = [
319
319
 
320
320
  [[package]]
321
321
  name = "authlib"
322
- version = "1.6.10"
322
+ version = "1.7.0"
323
323
  source = { registry = "https://pypi.org/simple" }
324
324
  dependencies = [
325
325
  { name = "cryptography" },
326
+ { name = "joserfc" },
326
327
  ]
327
- sdist = { url = "https://files.pythonhosted.org/packages/aa/e2/2cd626412bfc3c78b17ca5e5ea8d489f8cae31d40b061f4da0a89068d8a3/authlib-1.6.10.tar.gz", hash = "sha256:856a4f54d6ef3361ca6bb6d14a27e8b88f8097cca795fb428ffe13720e2ecde6", size = 165333, upload-time = "2026-04-13T13:30:34.718Z" }
328
+ sdist = { url = "https://files.pythonhosted.org/packages/d9/82/4d0603f30c1b4629b1f091bb266b0d7986434891d6940a8c87f8098db24e/authlib-1.7.0.tar.gz", hash = "sha256:b3e326c9aa9cc3ea95fe7d89fd880722d3608da4d00e8a27e061e64b48d801d5", size = 175890, upload-time = "2026-04-18T11:00:28.559Z" }
328
329
  wheels = [
329
- { url = "https://files.pythonhosted.org/packages/7d/f6/9093f1ed17b6e2f4ac50d214543d4ec5268902a70e2158a752a06423b5ef/authlib-1.6.10-py2.py3-none-any.whl", hash = "sha256:aa639b43292554539924a3b4aaa9e81cd67ab64d3e28b22428c61f1200240287", size = 244351, upload-time = "2026-04-13T13:30:33.34Z" },
330
+ { url = "https://files.pythonhosted.org/packages/ca/48/c954218b2a250e23f178f10167c4173fecb5a75d2c206f0a67ba58006c26/authlib-1.7.0-py2.py3-none-any.whl", hash = "sha256:e36817afb02f6f0b6bf55f150782499ddd6ddf44b402bb055d3263cc65ac9ae0", size = 258779, upload-time = "2026-04-18T11:00:26.64Z" },
330
331
  ]
331
332
 
332
333
  [[package]]
@@ -1661,6 +1662,18 @@ wheels = [
1661
1662
  { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" },
1662
1663
  ]
1663
1664
 
1665
+ [[package]]
1666
+ name = "joserfc"
1667
+ version = "1.6.4"
1668
+ source = { registry = "https://pypi.org/simple" }
1669
+ dependencies = [
1670
+ { name = "cryptography" },
1671
+ ]
1672
+ sdist = { url = "https://files.pythonhosted.org/packages/de/c6/de8fdbdfa75c8ca04fead38a82d573df8a82906e984c349d58665f459558/joserfc-1.6.4.tar.gz", hash = "sha256:34ce5f499bfcc5e9ad4cc75077f9278ab3227b71da9aaf28f9ab705f8a560d3c", size = 231866, upload-time = "2026-04-13T13:15:40.632Z" }
1673
+ wheels = [
1674
+ { url = "https://files.pythonhosted.org/packages/b6/f7/210b27752e972edb36d239315b08d3eb6b14824cc4a590da2337d195260b/joserfc-1.6.4-py3-none-any.whl", hash = "sha256:3e4a22b509b41908989237a045e25c8308d5fd47ab96bdae2dd8057c6451003a", size = 70464, upload-time = "2026-04-13T13:15:39.259Z" },
1675
+ ]
1676
+
1664
1677
  [[package]]
1665
1678
  name = "jsonpath-python"
1666
1679
  version = "1.1.5"
File without changes