apcore-toolkit 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/CHANGELOG.md +40 -0
  2. apcore_toolkit-0.5.0/LICENSE +17 -0
  3. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/PKG-INFO +10 -5
  4. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/README.md +3 -1
  5. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/pyproject.toml +5 -4
  6. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/__init__.py +29 -2
  7. apcore_toolkit-0.5.0/src/apcore_toolkit/_type_mapping.py +19 -0
  8. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/ai_enhancer.py +85 -40
  9. apcore_toolkit-0.5.0/src/apcore_toolkit/binding_loader.py +347 -0
  10. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/convention_scanner.py +69 -33
  11. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/display/resolver.py +38 -5
  12. apcore_toolkit-0.5.0/src/apcore_toolkit/http_verb_map.py +165 -0
  13. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/openapi.py +31 -6
  14. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/__init__.py +6 -0
  15. apcore_toolkit-0.5.0/src/apcore_toolkit/output/http_proxy_writer.py +247 -0
  16. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/python_writer.py +9 -9
  17. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/registry_writer.py +36 -4
  18. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/verifiers.py +9 -0
  19. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/yaml_writer.py +80 -22
  20. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/pydantic_utils.py +62 -7
  21. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/scanner.py +92 -8
  22. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/serializers.py +2 -0
  23. apcore_toolkit-0.5.0/src/apcore_toolkit/types.py +83 -0
  24. apcore_toolkit-0.5.0/tests/fixtures/scanner_verb_map.json +100 -0
  25. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_ai_enhancer.py +97 -0
  26. apcore_toolkit-0.5.0/tests/test_binding_loader.py +409 -0
  27. apcore_toolkit-0.5.0/tests/test_convention_scanner.py +319 -0
  28. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_display_resolver.py +203 -0
  29. apcore_toolkit-0.5.0/tests/test_http_proxy_writer.py +421 -0
  30. apcore_toolkit-0.5.0/tests/test_http_verb_map.py +246 -0
  31. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_openapi.py +13 -0
  32. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_output_factory.py +12 -0
  33. apcore_toolkit-0.5.0/tests/test_pydantic_utils.py +261 -0
  34. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_python_writer.py +26 -0
  35. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_registry_writer.py +29 -1
  36. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_scanner.py +133 -0
  37. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_serializers.py +19 -0
  38. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_types.py +72 -2
  39. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_verifiers.py +28 -0
  40. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_write_error.py +14 -9
  41. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_yaml_writer.py +74 -0
  42. apcore_toolkit-0.4.2/src/apcore_toolkit/output/http_proxy_writer.py +0 -187
  43. apcore_toolkit-0.4.2/src/apcore_toolkit/types.py +0 -46
  44. apcore_toolkit-0.4.2/tests/test_convention_scanner.py +0 -118
  45. apcore_toolkit-0.4.2/tests/test_http_proxy_writer.py +0 -204
  46. apcore_toolkit-0.4.2/tests/test_pydantic_utils.py +0 -120
  47. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.github/CODEOWNERS +0 -0
  48. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.github/copilot-ignore +0 -0
  49. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.github/workflows/ci.yml +0 -0
  50. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.gitignore +0 -0
  51. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.gitmessage +0 -0
  52. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/.pre-commit-config.yaml +0 -0
  53. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/display/__init__.py +0 -0
  54. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/formatting/__init__.py +0 -0
  55. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/formatting/markdown.py +0 -0
  56. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/errors.py +0 -0
  57. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/output/types.py +0 -0
  58. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/src/apcore_toolkit/schema_utils.py +0 -0
  59. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/conftest.py +0 -0
  60. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_markdown.py +0 -0
  61. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_schema_utils.py +0 -0
  62. {apcore_toolkit-0.4.2 → apcore_toolkit-0.5.0}/tests/test_write_result.py +0 -0
@@ -2,6 +2,46 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.5.0] - 2026-04-21
6
+
7
+ ### Added
8
+
9
+ - **`BindingLoader`** (`apcore_toolkit.binding_loader`) — parses `.binding.yaml` files back into `ScannedModule` objects, the inverse of `YAMLWriter`. Unlike `apcore.BindingLoader`, this is pure data: no target import, no Registry mutation. Enables verification, merging, diffing, and round-trip workflows.
10
+ - `load(path, *, strict=False)` — single file or directory of `*.binding.yaml`.
11
+ - `load_data(data, *, strict=False)` — pre-parsed YAML dict.
12
+ - Loose mode (default): only `module_id + target` required; missing fields use defaults.
13
+ - Strict mode: additionally requires `input_schema + output_schema`.
14
+ - `spec_version` validated; missing/unsupported versions WARN but do not fail.
15
+ - `annotations` parsed via `ModuleAnnotations.from_dict`; malformed values degrade to `None` with WARN.
16
+ - `examples` entries validated individually; malformed ones skipped with WARN.
17
+ - `BindingLoadError` exception carries `file_path`, `module_id`, `missing_fields`, `reason`.
18
+ - **`ScannedModule.display`** — new top-level optional field (`dict | None`) holding the sparse display overlay for binding YAML persistence. Distinct from `metadata["display"]` (resolved form produced by `DisplayResolver`).
19
+ - **New feature doc**: `docs/features/binding-loader.md`; `display-overlay.md` and `output-writers.md` updated.
20
+
21
+ ### Changed
22
+
23
+ - **`YAMLWriter._build_binding`** — emits top-level `display:` key only when `ScannedModule.display is not None` (skip when None keeps output clean).
24
+ - **`serializers.module_to_dict`** — includes `display` key in output.
25
+ - **`AIEnhancer._build_prompt`** — confidence template is now built dynamically from `gaps`. When `annotations` is in gaps, the prompt requests per-field confidence for every `_ANNOTATION_FIELD_VALIDATORS` field (`annotations.readonly`, `annotations.streaming`, `annotations.cache_ttl`, ...). Previously the template hard-coded `{"description": 0.0, "documentation": 0.0}` only, causing all annotation-field confidence lookups to fall back to `0.0` and fail the threshold check — annotation enhancement silently never took effect. Fixes symmetry with `_enhance_module`'s `ann_conf.get(f"annotations.{field_name}", ...)` read path.
26
+
27
+ ### Dependencies
28
+
29
+ - **`apcore >= 0.19.0`** — picks up the expanded `ModuleAnnotations` (12 fields incl. `streaming`, `cacheable`, `cache_ttl`, `cache_key_fields`, `paginated`, `pagination_style`, `extra`). No toolkit code changes were needed for the type itself — `_build_annotation_field_validators` reflects the updated dataclass automatically.
30
+
31
+ ### Tests
32
+
33
+ - +34 new tests: 24 for `BindingLoader` (parsing, strict/loose modes, spec_version, file & directory loading, round-trip with `YAMLWriter`), 5 for the prompt confidence block, and 5 hardening tests (display deep-copy, malformed-shape warn, recursive glob, UTF-8 encoding, null-field error wording).
34
+ - Updated `test_field_count` (13 → 14) and `test_all_expected_keys` for the new `display` field.
35
+ - Total suite: 440 tests.
36
+
37
+ ### Hardening (post-review)
38
+
39
+ - **`BindingLoader`**: warns (rather than silently drops) malformed `display` values that are not a mapping; `load()` gained a `recursive: bool = False` kwarg for nested binding layouts; `read_text` now forces UTF-8 decoding so non-ASCII aliases round-trip on non-UTF-8 locales; required-field validation now rejects wrong-type scalars (e.g. `module_id: 42`, `target: true`) and empty strings in addition to absent/null, matching the Rust loader's contract — error wording is "missing or invalid required fields"; nested `input_schema`/`output_schema`/`metadata` are now deep-copied via `copy.deepcopy` so caller mutation does not leak back into the parsed YAML source graph.
40
+ - **`YAMLWriter`**: `display` is now deep-copied into the emitted binding (defensive parity with the TypeScript/Rust writers) so post-write mutation of `ScannedModule.display` cannot leak into the file. File writes are now atomic: the payload is written to `<name>.<pid>.tmp`, `fsync`ed, then `os.replace`d onto the final path (matches the TypeScript `tmp + rename` and Rust `tmp + sync_all + rename` writers). A process crash mid-write no longer leaves a partial YAML file that `BindingLoader` would fail to parse. A pre-write check refuses to overwrite a symlink at the target path (defence-in-depth against TOCTOU).
41
+ - **`BaseScanner.deduplicate_ids`**: pre-scans all input `module_id`s so generated `_N` suffixes never collide with an ID already present in the input. Input `[a, a, a_2]` now yields `[a, a_3, a_2]` instead of the previous buggy `[a, a_2, a_2]`. Matches the TypeScript and Rust implementations.
42
+ - **`resolve_target` / `RegistryWriter.write`**: new `allowed_prefixes: list[str] | None` kwarg (forwarded from `RegistryWriter.write` through `_to_function_module` to `resolve_target`). When set, `resolve_target` rejects any module path outside the listed prefixes **before** calling `importlib.import_module`, raising `PermissionError`. Mitigates arbitrary-code-execution via forged binding files (e.g. a malicious `target: "os:system"` injected into untrusted YAML). Parity with the TypeScript SDK's `allowedPrefixes` option, adapted to Python's module-name import model. Boundary-aware: `"myapp"` permits `myapp.views` but NOT `myappx.foo`. Rust does not need this because `resolve_target` is parse-only and the `HandlerFactory` is the security boundary.
43
+ - **`ScannedModule.display`**: moved to the END of the dataclass so existing positional `ScannedModule(...)` callers are not broken by the new field.
44
+
5
45
  ## [0.4.1] - 2026-03-25
6
46
 
7
47
  ### Added
@@ -0,0 +1,17 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Copyright 2024 AI Partner Up
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apcore-toolkit
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Shared scanner, schema extraction, and output toolkit for apcore framework adapters
5
5
  Project-URL: Homepage, https://aiperceivable.com
6
6
  Project-URL: Repository, https://github.com/aiperceivable/apcore-toolkit-python
@@ -8,9 +8,10 @@ Project-URL: Documentation, https://github.com/aiperceivable/apcore-toolkit-pyth
8
8
  Project-URL: Issues, https://github.com/aiperceivable/apcore-toolkit-python/issues
9
9
  Author-email: aiperceivable <tercel.yi@gmail.com>
10
10
  License-Expression: Apache-2.0
11
+ License-File: LICENSE
11
12
  Keywords: apcore,mcp,openapi,pydantic,scanner,schema,toolkit,yaml
12
13
  Requires-Python: >=3.11
13
- Requires-Dist: apcore>=0.18.0
14
+ Requires-Dist: apcore>=0.19.0
14
15
  Requires-Dist: pydantic>=2.0
15
16
  Requires-Dist: pyyaml>=6.0
16
17
  Provides-Extra: dev
@@ -18,10 +19,12 @@ Requires-Dist: apdev[dev]>=0.2.1; extra == 'dev'
18
19
  Requires-Dist: httpx>=0.24; extra == 'dev'
19
20
  Requires-Dist: mypy>=1.0; extra == 'dev'
20
21
  Requires-Dist: pytest-cov>=4.0; extra == 'dev'
21
- Requires-Dist: pytest>=7.0; extra == 'dev'
22
- Requires-Dist: ruff>=0.1; extra == 'dev'
22
+ Requires-Dist: pytest>=8.0; extra == 'dev'
23
+ Requires-Dist: ruff>=0.9; extra == 'dev'
23
24
  Provides-Extra: http-proxy
24
25
  Requires-Dist: httpx>=0.24; extra == 'http-proxy'
26
+ Provides-Extra: json-schema
27
+ Requires-Dist: jsonschema>=4.0; extra == 'json-schema'
25
28
  Description-Content-Type: text/markdown
26
29
 
27
30
  <div align="center">
@@ -48,6 +51,8 @@ pip install apcore-toolkit
48
51
  | `ScannedModule` | Canonical dataclass representing a scanned endpoint |
49
52
  | `BaseScanner` | Abstract base class for framework scanners with filtering and deduplication |
50
53
  | `YAMLWriter` | Generates `.binding.yaml` files for `apcore.BindingLoader` |
54
+ | `BindingLoader` | Parses `.binding.yaml` files back into `ScannedModule` objects (pure-data inverse of `YAMLWriter`, with loose/strict modes) |
55
+ | `BindingLoadError` | Exception raised when binding parsing fails; carries `file_path`, `module_id`, `missing_fields`, `reason` |
51
56
  | `PythonWriter` | Generates `@module`-decorated Python wrapper files |
52
57
  | `RegistryWriter` | Registers modules directly into an `apcore.Registry` |
53
58
  | `HTTPProxyRegistryWriter` | Registers HTTP proxy modules that forward requests to a running API |
@@ -249,7 +254,7 @@ Input and output schemas are inferred from PEP 484 type annotations. Use `includ
249
254
  ## Requirements
250
255
 
251
256
  - Python >= 3.11
252
- - apcore >= 0.14.0
257
+ - apcore >= 0.19.0
253
258
  - pydantic >= 2.0
254
259
  - PyYAML >= 6.0
255
260
 
@@ -22,6 +22,8 @@ pip install apcore-toolkit
22
22
  | `ScannedModule` | Canonical dataclass representing a scanned endpoint |
23
23
  | `BaseScanner` | Abstract base class for framework scanners with filtering and deduplication |
24
24
  | `YAMLWriter` | Generates `.binding.yaml` files for `apcore.BindingLoader` |
25
+ | `BindingLoader` | Parses `.binding.yaml` files back into `ScannedModule` objects (pure-data inverse of `YAMLWriter`, with loose/strict modes) |
26
+ | `BindingLoadError` | Exception raised when binding parsing fails; carries `file_path`, `module_id`, `missing_fields`, `reason` |
25
27
  | `PythonWriter` | Generates `@module`-decorated Python wrapper files |
26
28
  | `RegistryWriter` | Registers modules directly into an `apcore.Registry` |
27
29
  | `HTTPProxyRegistryWriter` | Registers HTTP proxy modules that forward requests to a running API |
@@ -223,7 +225,7 @@ Input and output schemas are inferred from PEP 484 type annotations. Use `includ
223
225
  ## Requirements
224
226
 
225
227
  - Python >= 3.11
226
- - apcore >= 0.14.0
228
+ - apcore >= 0.19.0
227
229
  - pydantic >= 2.0
228
230
  - PyYAML >= 6.0
229
231
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "apcore-toolkit"
7
- version = "0.4.2"
7
+ version = "0.5.0"
8
8
  description = "Shared scanner, schema extraction, and output toolkit for apcore framework adapters"
9
9
  requires-python = ">=3.11"
10
10
  readme = "README.md"
@@ -23,17 +23,18 @@ keywords = [
23
23
  "toolkit",
24
24
  ]
25
25
  dependencies = [
26
- "apcore>=0.18.0",
26
+ "apcore>=0.19.0",
27
27
  "pydantic>=2.0",
28
28
  "PyYAML>=6.0",
29
29
  ]
30
30
 
31
31
  [project.optional-dependencies]
32
32
  http-proxy = ["httpx>=0.24"]
33
+ json-schema = ["jsonschema>=4.0"]
33
34
  dev = [
34
- "pytest>=7.0",
35
+ "pytest>=8.0",
35
36
  "pytest-cov>=4.0",
36
- "ruff>=0.1",
37
+ "ruff>=0.9",
37
38
  "mypy>=1.0",
38
39
  "apdev[dev]>=0.2.1",
39
40
  "httpx>=0.24",
@@ -6,6 +6,7 @@ Public API re-exports for convenient access to core types and utilities.
6
6
  from importlib.metadata import PackageNotFoundError
7
7
  from importlib.metadata import version as _get_version
8
8
  from apcore_toolkit.ai_enhancer import AIEnhancer, Enhancer
9
+ from apcore_toolkit.binding_loader import BindingLoader, BindingLoadError
9
10
  from apcore_toolkit.display import DisplayResolver
10
11
  from apcore_toolkit.formatting import to_markdown
11
12
  from apcore_toolkit.openapi import (
@@ -32,10 +33,23 @@ from apcore_toolkit.output.verifiers import (
32
33
  from apcore_toolkit.output.yaml_writer import YAMLWriter
33
34
  from apcore_toolkit.pydantic_utils import flatten_pydantic_params, resolve_target
34
35
  from apcore_toolkit.convention_scanner import ConventionScanner
35
- from apcore_toolkit.scanner import BaseScanner
36
+ from apcore_toolkit.http_verb_map import (
37
+ SCANNER_VERB_MAP,
38
+ extract_path_param_names,
39
+ generate_suggested_alias,
40
+ has_path_params,
41
+ resolve_http_verb,
42
+ substitute_path_params,
43
+ )
44
+ from apcore_toolkit.scanner import (
45
+ BaseScanner,
46
+ deduplicate_ids,
47
+ filter_modules,
48
+ infer_annotations_from_method,
49
+ )
36
50
  from apcore_toolkit.schema_utils import enrich_schema_descriptions
37
51
  from apcore_toolkit.serializers import annotations_to_dict, module_to_dict, modules_to_dicts
38
- from apcore_toolkit.types import ScannedModule
52
+ from apcore_toolkit.types import ScannedModule, clone_module, create_scanned_module
39
53
 
40
54
  try:
41
55
  __version__ = _get_version("apcore-toolkit")
@@ -44,6 +58,8 @@ except PackageNotFoundError:
44
58
 
45
59
  __all__ = [
46
60
  "AIEnhancer",
61
+ "BindingLoadError",
62
+ "BindingLoader",
47
63
  "DisplayResolver",
48
64
  "BaseScanner",
49
65
  "ConventionScanner",
@@ -54,6 +70,7 @@ __all__ = [
54
70
  "PythonWriter",
55
71
  "RegistryVerifier",
56
72
  "RegistryWriter",
73
+ "SCANNER_VERB_MAP",
57
74
  "ScannedModule",
58
75
  "SyntaxVerifier",
59
76
  "Verifier",
@@ -63,17 +80,27 @@ __all__ = [
63
80
  "YAMLVerifier",
64
81
  "YAMLWriter",
65
82
  "annotations_to_dict",
83
+ "clone_module",
84
+ "create_scanned_module",
85
+ "deduplicate_ids",
66
86
  "deep_resolve_refs",
67
87
  "enrich_schema_descriptions",
68
88
  "extract_input_schema",
69
89
  "extract_output_schema",
90
+ "extract_path_param_names",
91
+ "filter_modules",
70
92
  "flatten_pydantic_params",
93
+ "generate_suggested_alias",
71
94
  "get_writer",
95
+ "has_path_params",
96
+ "infer_annotations_from_method",
72
97
  "module_to_dict",
73
98
  "modules_to_dicts",
99
+ "resolve_http_verb",
74
100
  "resolve_ref",
75
101
  "resolve_schema",
76
102
  "resolve_target",
77
103
  "run_verifier_chain",
104
+ "substitute_path_params",
78
105
  "to_markdown",
79
106
  ]
@@ -0,0 +1,19 @@
1
+ """Shared Python ↔ JSON Schema type vocabulary.
2
+
3
+ Single source of truth for the 6-type mapping used by ConventionScanner
4
+ (Python→JSON Schema) and PythonWriter (JSON Schema→Python). Adding a new
5
+ type here propagates to both automatically.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ PYTHON_TO_JSON_SCHEMA: dict[str, str] = {
11
+ "str": "string",
12
+ "int": "integer",
13
+ "float": "number",
14
+ "bool": "boolean",
15
+ "list": "array",
16
+ "dict": "object",
17
+ }
18
+
19
+ JSON_SCHEMA_TO_PYTHON: dict[str, str] = {v: k for k, v in PYTHON_TO_JSON_SCHEMA.items()}
@@ -111,6 +111,11 @@ class AIEnhancer:
111
111
  timeout: int | None = None,
112
112
  ) -> None:
113
113
  self.endpoint = endpoint or os.environ.get("APCORE_AI_ENDPOINT", _DEFAULT_ENDPOINT)
114
+ from urllib.parse import urlparse as _urlparse
115
+
116
+ _parsed = _urlparse(self.endpoint)
117
+ if _parsed.scheme not in ("http", "https"):
118
+ raise ValueError(f"APCORE_AI_ENDPOINT must use http or https scheme, got: {self.endpoint!r}")
114
119
  self.model = model or os.environ.get("APCORE_AI_MODEL", _DEFAULT_MODEL)
115
120
  self.threshold = (
116
121
  threshold if threshold is not None else self._parse_float_env("APCORE_AI_THRESHOLD", _DEFAULT_THRESHOLD)
@@ -158,10 +163,13 @@ class AIEnhancer:
158
163
  For each module, identifies missing fields and calls the SLM to
159
164
  generate them. Only fields above the confidence threshold are applied.
160
165
 
161
- Modules with gaps are collected into batches of ``batch_size``
162
- (configured via ``APCORE_AI_BATCH_SIZE``, default 5). Each batch
163
- shares a single prompt/API call where possible, reducing round-trips.
164
- When batch_size is 1, behaviour is identical to per-module processing.
166
+ ``batch_size`` (configured via ``APCORE_AI_BATCH_SIZE``, default 5)
167
+ currently controls only the outer iteration granularity — each
168
+ module still produces its own prompt and API call. The setting
169
+ is retained so a future implementation can coalesce prompts
170
+ without changing the caller-facing configuration. When
171
+ ``batch_size`` is 1, behaviour is identical to per-module
172
+ processing. **It does not currently reduce API round-trips.**
165
173
 
166
174
  Args:
167
175
  modules: List of ScannedModule instances (post-scan).
@@ -182,15 +190,13 @@ class AIEnhancer:
182
190
  results.append(module)
183
191
  pending.append((idx, module, gaps))
184
192
 
185
- # Process pending modules in batches
186
- for batch_start in range(0, len(pending), self.batch_size):
187
- batch = pending[batch_start : batch_start + self.batch_size]
188
- for idx, module, gaps in batch:
189
- try:
190
- enhanced = self._enhance_module(module, gaps)
191
- results[idx] = enhanced
192
- except Exception:
193
- logger.warning("AI enhancement failed for %s, keeping original", module.module_id, exc_info=True)
193
+ # TODO: coalesce batch_size modules into a single API call to reduce round-trips
194
+ for idx, module, gaps in pending:
195
+ try:
196
+ enhanced = self._enhance_module(module, gaps)
197
+ results[idx] = enhanced
198
+ except Exception:
199
+ logger.error("AI enhancement failed for %s, keeping original", module.module_id, exc_info=True)
194
200
 
195
201
  return results
196
202
 
@@ -217,35 +223,63 @@ class AIEnhancer:
217
223
  confidence: dict[str, float] = {}
218
224
  warnings: list[str] = list(module.warnings)
219
225
 
220
- # Apply description if above threshold
221
- if "description" in gaps and "description" in parsed:
222
- desc_conf = parsed.get("confidence", {}).get("description", 0.0)
223
- confidence["description"] = desc_conf
224
- if desc_conf >= self.threshold:
225
- updates["description"] = parsed["description"]
226
+ # Guard: SLM may return confidence as a non-dict (e.g. "high" or 1).
227
+ # Treat any non-dict value as absent — all fields default to 0.0.
228
+ confidence_raw = parsed.get("confidence")
229
+ if confidence_raw is not None and not isinstance(confidence_raw, dict):
230
+ logger.warning(
231
+ "Module '%s': SLM returned non-dict 'confidence' (%s) — treating as absent.",
232
+ module.module_id,
233
+ type(confidence_raw).__name__,
234
+ )
235
+ confidence_parsed: dict[str, Any] = confidence_raw if isinstance(confidence_raw, dict) else {}
236
+
237
+ def _apply_simple(field: str) -> None:
238
+ """Apply a simple scalar field from parsed SLM output if confidence is sufficient."""
239
+ if field not in gaps or field not in parsed:
240
+ return
241
+ raw_conf = confidence_parsed.get(field, 0.0)
242
+ if not isinstance(raw_conf, (int, float)) or isinstance(raw_conf, bool):
243
+ logger.warning(
244
+ "Module '%s': non-numeric confidence for %r (%r) — treating as 0.0",
245
+ module.module_id,
246
+ field,
247
+ raw_conf,
248
+ )
249
+ field_conf: float = 0.0
226
250
  else:
227
- warnings.append(f"Low confidence ({desc_conf:.2f}) for description — skipped. Review manually.")
228
-
229
- # Apply documentation if above threshold
230
- if "documentation" in gaps and "documentation" in parsed:
231
- doc_conf = parsed.get("confidence", {}).get("documentation", 0.0)
232
- confidence["documentation"] = doc_conf
233
- if doc_conf >= self.threshold:
234
- updates["documentation"] = parsed["documentation"]
251
+ field_conf = float(raw_conf)
252
+ confidence[field] = field_conf
253
+ if field_conf >= self.threshold:
254
+ updates[field] = parsed[field]
235
255
  else:
236
- warnings.append(f"Low confidence ({doc_conf:.2f}) for documentation — skipped. Review manually.")
256
+ warnings.append(f"Low confidence ({field_conf:.2f}) for {field} — skipped. Review manually.")
257
+
258
+ _apply_simple("description")
259
+ _apply_simple("documentation")
237
260
 
238
261
  # Apply annotations if above threshold. Field set is derived from
239
262
  # ModuleAnnotations at import time, so adding new fields upstream
240
263
  # automatically widens what the SLM may populate (extra excluded).
241
264
  if "annotations" in gaps and "annotations" in parsed and isinstance(parsed["annotations"], dict):
242
265
  ann_data = parsed["annotations"]
243
- ann_conf = parsed.get("confidence", {})
244
266
  accepted: dict[str, Any] = {}
245
267
  for field_name, validate in _ANNOTATION_FIELD_VALIDATORS.items():
246
268
  if field_name not in ann_data or not validate(ann_data[field_name]):
247
269
  continue
248
- field_conf = ann_conf.get(f"annotations.{field_name}", ann_conf.get(field_name, 0.0))
270
+ raw_ann_conf = confidence_parsed.get(
271
+ f"annotations.{field_name}", confidence_parsed.get(field_name, 0.0)
272
+ )
273
+ if not isinstance(raw_ann_conf, (int, float)) or isinstance(raw_ann_conf, bool):
274
+ logger.warning(
275
+ "Module '%s': non-numeric confidence for 'annotations.%s' (%r) — treating as 0.0",
276
+ module.module_id,
277
+ field_name,
278
+ raw_ann_conf,
279
+ )
280
+ field_conf = 0.0
281
+ else:
282
+ field_conf = float(raw_ann_conf)
249
283
  confidence[f"annotations.{field_name}"] = field_conf
250
284
  if field_conf >= self.threshold:
251
285
  accepted[field_name] = ann_data[field_name]
@@ -257,14 +291,7 @@ class AIEnhancer:
257
291
  base = module.annotations or DEFAULT_ANNOTATIONS
258
292
  updates["annotations"] = replace(base, **accepted)
259
293
 
260
- # Apply input_schema if above threshold
261
- if "input_schema" in gaps and "input_schema" in parsed:
262
- schema_conf = parsed.get("confidence", {}).get("input_schema", 0.0)
263
- confidence["input_schema"] = schema_conf
264
- if schema_conf >= self.threshold:
265
- updates["input_schema"] = parsed["input_schema"]
266
- else:
267
- warnings.append(f"Low confidence ({schema_conf:.2f}) for input_schema — skipped. Review manually.")
294
+ _apply_simple("input_schema")
268
295
 
269
296
  if not updates:
270
297
  return replace(module, warnings=warnings) if warnings != module.warnings else module
@@ -314,8 +341,23 @@ class AIEnhancer:
314
341
  if "input_schema" in gaps:
315
342
  parts.append(' "input_schema": <JSON Schema object for function parameters>,')
316
343
 
344
+ # Build confidence keys dynamically from gaps so the SLM is told to
345
+ # supply confidence for every field it is being asked to fill. The
346
+ # read side (_enhance_module) looks up annotations.<name> per-field
347
+ # keys via _ANNOTATION_FIELD_VALIDATORS; keep the prompt and the
348
+ # read logic symmetric by enumerating the same validator set.
349
+ confidence_keys: list[str] = []
350
+ if "description" in gaps:
351
+ confidence_keys.append("description")
352
+ if "documentation" in gaps:
353
+ confidence_keys.append("documentation")
354
+ if "input_schema" in gaps:
355
+ confidence_keys.append("input_schema")
356
+ if "annotations" in gaps:
357
+ confidence_keys.extend(f"annotations.{name}" for name in _ANNOTATION_FIELD_VALIDATORS)
358
+
317
359
  parts.append(' "confidence": {')
318
- parts.append(' "description": 0.0, "documentation": 0.0')
360
+ parts.append(" " + ", ".join(f'"{k}": 0.0' for k in confidence_keys))
319
361
  parts.append(" }")
320
362
  parts.append("}")
321
363
  parts.append("")
@@ -369,6 +411,9 @@ class AIEnhancer:
369
411
  text = "\n".join(lines)
370
412
 
371
413
  try:
372
- return json.loads(text)
414
+ result = json.loads(text)
373
415
  except json.JSONDecodeError as exc:
374
416
  raise ValueError(f"SLM returned invalid JSON: {exc}") from exc
417
+ if not isinstance(result, dict):
418
+ raise ValueError(f"SLM returned non-dict JSON ({type(result).__name__}); expected a JSON object")
419
+ return result