pydantic-fixturegen 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydantic-fixturegen might be problematic. Click here for more details.

Files changed (41) hide show
  1. pydantic_fixturegen/__init__.py +7 -0
  2. pydantic_fixturegen/cli/__init__.py +85 -0
  3. pydantic_fixturegen/cli/doctor.py +235 -0
  4. pydantic_fixturegen/cli/gen/__init__.py +23 -0
  5. pydantic_fixturegen/cli/gen/_common.py +139 -0
  6. pydantic_fixturegen/cli/gen/explain.py +145 -0
  7. pydantic_fixturegen/cli/gen/fixtures.py +283 -0
  8. pydantic_fixturegen/cli/gen/json.py +262 -0
  9. pydantic_fixturegen/cli/gen/schema.py +164 -0
  10. pydantic_fixturegen/cli/list.py +164 -0
  11. pydantic_fixturegen/core/__init__.py +103 -0
  12. pydantic_fixturegen/core/ast_discover.py +169 -0
  13. pydantic_fixturegen/core/config.py +440 -0
  14. pydantic_fixturegen/core/errors.py +136 -0
  15. pydantic_fixturegen/core/generate.py +311 -0
  16. pydantic_fixturegen/core/introspect.py +141 -0
  17. pydantic_fixturegen/core/io_utils.py +77 -0
  18. pydantic_fixturegen/core/providers/__init__.py +32 -0
  19. pydantic_fixturegen/core/providers/collections.py +74 -0
  20. pydantic_fixturegen/core/providers/identifiers.py +68 -0
  21. pydantic_fixturegen/core/providers/numbers.py +133 -0
  22. pydantic_fixturegen/core/providers/registry.py +98 -0
  23. pydantic_fixturegen/core/providers/strings.py +109 -0
  24. pydantic_fixturegen/core/providers/temporal.py +42 -0
  25. pydantic_fixturegen/core/safe_import.py +403 -0
  26. pydantic_fixturegen/core/schema.py +320 -0
  27. pydantic_fixturegen/core/seed.py +154 -0
  28. pydantic_fixturegen/core/strategies.py +193 -0
  29. pydantic_fixturegen/core/version.py +52 -0
  30. pydantic_fixturegen/emitters/__init__.py +15 -0
  31. pydantic_fixturegen/emitters/json_out.py +373 -0
  32. pydantic_fixturegen/emitters/pytest_codegen.py +365 -0
  33. pydantic_fixturegen/emitters/schema_out.py +84 -0
  34. pydantic_fixturegen/plugins/builtin.py +45 -0
  35. pydantic_fixturegen/plugins/hookspecs.py +59 -0
  36. pydantic_fixturegen/plugins/loader.py +72 -0
  37. pydantic_fixturegen-1.0.0.dist-info/METADATA +280 -0
  38. pydantic_fixturegen-1.0.0.dist-info/RECORD +41 -0
  39. pydantic_fixturegen-1.0.0.dist-info/WHEEL +4 -0
  40. pydantic_fixturegen-1.0.0.dist-info/entry_points.txt +5 -0
  41. pydantic_fixturegen-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,52 @@
1
+ """Helpers for interrogating tool version metadata and formatting artifact headers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping, Sequence
6
+ from functools import lru_cache
7
+ from importlib import metadata
8
+ from typing import Any
9
+
10
+ _PACKAGE_NAME = "pydantic-fixturegen"
11
+ _HEADER_PREFIX = "Generated by pydantic-fixturegen"
12
+
13
+
14
+ @lru_cache(maxsize=1)
15
+ def get_tool_version() -> str:
16
+ """Return the installed package version.
17
+
18
+ Falls back to a development tag if the distribution metadata is missing.
19
+ """
20
+ try:
21
+ return metadata.version(_PACKAGE_NAME)
22
+ except metadata.PackageNotFoundError:
23
+ return "0.0.0+dev"
24
+
25
+
26
+ def build_artifact_header(
27
+ *,
28
+ seed: Any | None,
29
+ model_digest: str | None,
30
+ extras: Mapping[str, Any] | Sequence[tuple[str, Any]] | None = None,
31
+ ) -> str:
32
+ """Compose a standardized header string for generated artifacts.
33
+
34
+ Args:
35
+ seed: Seed value that drove deterministic generation (None if unknown).
36
+ model_digest: Hash or identifier describing the model graph.
37
+ extras: Optional additional key/value metadata to include.
38
+ """
39
+ version = get_tool_version()
40
+ segments: list[str] = [
41
+ f"{_HEADER_PREFIX} v{version}",
42
+ f"seed={seed if seed is not None else 'unknown'}",
43
+ f"model-digest={model_digest or 'unknown'}",
44
+ ]
45
+
46
+ if extras:
47
+ items = extras.items() if isinstance(extras, Mapping) else extras
48
+
49
+ for key, value in sorted(items):
50
+ segments.append(f"{key}={value}")
51
+
52
+ return " | ".join(segments)
@@ -0,0 +1,15 @@
1
+ """Emitters for producing artifacts from generated instances."""
2
+
3
+ from .json_out import JsonEmitConfig, emit_json_samples
4
+ from .pytest_codegen import PytestEmitConfig, emit_pytest_fixtures
5
+ from .schema_out import SchemaEmitConfig, emit_model_schema, emit_models_schema
6
+
7
+ __all__ = [
8
+ "JsonEmitConfig",
9
+ "SchemaEmitConfig",
10
+ "PytestEmitConfig",
11
+ "emit_json_samples",
12
+ "emit_model_schema",
13
+ "emit_models_schema",
14
+ "emit_pytest_fixtures",
15
+ ]
@@ -0,0 +1,373 @@
1
+ """Utilities for emitting generated instances to JSON/JSONL files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import dataclasses
6
+ import json
7
+ from collections.abc import Callable, Iterable, Iterator, Sequence
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from dataclasses import dataclass
10
+ from itertools import islice
11
+ from pathlib import Path
12
+ from types import ModuleType
13
+ from typing import Any, cast
14
+
15
+ from pydantic import BaseModel
16
+
17
+ orjson: ModuleType | None
18
+ try: # Optional dependency
19
+ import orjson as _orjson
20
+ except ImportError: # pragma: no cover - optional extra not installed
21
+ orjson = None
22
+ else:
23
+ orjson = _orjson
24
+
25
+ DEFAULT_INDENT = 2
26
+ DEFAULT_SHARD_PAD = 5
27
+
28
+
29
+ @dataclass(slots=True)
30
+ class JsonEmitConfig:
31
+ """Configuration options for JSON emission."""
32
+
33
+ output_path: Path
34
+ count: int
35
+ jsonl: bool = False
36
+ indent: int | None = DEFAULT_INDENT
37
+ shard_size: int | None = None
38
+ use_orjson: bool = False
39
+ ensure_ascii: bool = False
40
+ max_workers: int | None = None
41
+
42
+
43
+ def emit_json_samples(
44
+ samples: Iterable[Any] | Callable[[], Any],
45
+ *,
46
+ output_path: str | Path,
47
+ count: int,
48
+ jsonl: bool = False,
49
+ indent: int | None = DEFAULT_INDENT,
50
+ shard_size: int | None = None,
51
+ use_orjson: bool = False,
52
+ ensure_ascii: bool = False,
53
+ max_workers: int | None = None,
54
+ ) -> list[Path]:
55
+ """Emit generated samples to JSON or JSONL files.
56
+
57
+ Args:
58
+ samples: Iterable of pre-generated items or callable producing a new item
59
+ when invoked (used ``count`` times).
60
+ output_path: Target file path (single file) or stem used for sharded
61
+ outputs. File suffix is normalised based on ``jsonl``.
62
+ count: Number of samples to write.
63
+ jsonl: Emit newline-delimited JSON instead of a JSON array.
64
+ indent: Indentation level (``0``/``None`` -> compact). For JSONL it is
65
+ ignored.
66
+ shard_size: Maximum number of records per shard. ``None`` or ``<= 0``
67
+ emits a single file.
68
+ use_orjson: Serialise with orjson when available for performance.
69
+ ensure_ascii: Force ASCII-only output when using the stdlib encoder.
70
+ max_workers: Optional worker cap for concurrent shard writes.
71
+
72
+ Returns:
73
+ List of ``Path`` objects for the created file(s), ordered by shard index.
74
+ """
75
+
76
+ config = JsonEmitConfig(
77
+ output_path=Path(output_path),
78
+ count=count,
79
+ jsonl=jsonl,
80
+ indent=_normalise_indent(indent, jsonl=jsonl),
81
+ shard_size=_normalise_shard_size(shard_size, count),
82
+ use_orjson=use_orjson,
83
+ ensure_ascii=ensure_ascii,
84
+ max_workers=max_workers,
85
+ )
86
+ encoder = _JsonEncoder(
87
+ indent=config.indent,
88
+ ensure_ascii=config.ensure_ascii,
89
+ use_orjson=config.use_orjson,
90
+ )
91
+
92
+ samples_iter = _collect_samples(samples, config.count)
93
+
94
+ if config.shard_size is None:
95
+ if config.jsonl:
96
+ path = _stream_jsonl(
97
+ samples_iter,
98
+ config.output_path,
99
+ encoder,
100
+ )
101
+ else:
102
+ path = _stream_json_array(
103
+ samples_iter,
104
+ config.output_path,
105
+ encoder,
106
+ indent=config.indent,
107
+ )
108
+ return [path]
109
+
110
+ return _write_chunked_samples(samples_iter, config, encoder)
111
+
112
+
113
+ # --------------------------------------------------------------------------- helpers
114
+ def _collect_samples(
115
+ samples: Iterable[Any] | Callable[[], Any],
116
+ count: int,
117
+ ) -> Iterator[Any]:
118
+ if count <= 0:
119
+ return iter(())
120
+
121
+ if callable(samples):
122
+
123
+ def factory_iterator() -> Iterator[Any]:
124
+ for _ in range(count):
125
+ yield _normalise_record(samples())
126
+
127
+ return factory_iterator()
128
+
129
+ def iterable_iterator() -> Iterator[Any]:
130
+ for yielded, item in enumerate(samples):
131
+ if yielded >= count:
132
+ break
133
+ yield _normalise_record(item)
134
+
135
+ return iterable_iterator()
136
+
137
+
138
+ def _normalise_indent(indent: int | None, *, jsonl: bool) -> int | None:
139
+ if jsonl:
140
+ return None
141
+ if indent is None or indent == 0:
142
+ return None
143
+ if indent < 0:
144
+ raise ValueError("indent must be >= 0")
145
+ return indent
146
+
147
+
148
+ def _normalise_shard_size(shard_size: int | None, count: int) -> int | None:
149
+ if shard_size is None or shard_size <= 0:
150
+ return None
151
+ return max(1, min(shard_size, count)) if count > 0 else shard_size
152
+
153
+
154
+ def _worker_count(max_workers: int | None, shard_count: int) -> int:
155
+ if shard_count <= 1:
156
+ return 1
157
+ if max_workers is not None:
158
+ return max(1, min(max_workers, shard_count))
159
+ return min(shard_count, (os_cpu_count() or 1) * 2)
160
+
161
+
162
+ def _write_empty_shard(
163
+ base_path: Path,
164
+ jsonl: bool,
165
+ encoder: _JsonEncoder,
166
+ ) -> Path:
167
+ path = _shard_path(base_path, 1, 1, jsonl)
168
+ empty_payload = "" if jsonl else encoder.encode([])
169
+ path.parent.mkdir(parents=True, exist_ok=True)
170
+ path.write_text(empty_payload, encoding="utf-8")
171
+ return path
172
+
173
+
174
+ def _prepare_payload(
175
+ chunk: Sequence[Any],
176
+ *,
177
+ jsonl: bool,
178
+ encoder: _JsonEncoder,
179
+ workers: int,
180
+ ) -> str:
181
+ if not jsonl:
182
+ return encoder.encode(list(chunk))
183
+
184
+ if workers <= 1:
185
+ lines = [encoder.encode(item) for item in chunk]
186
+ else:
187
+ with ThreadPoolExecutor(max_workers=workers) as executor:
188
+ lines = list(executor.map(encoder.encode, chunk))
189
+ return "\n".join(lines) + ("\n" if lines else "")
190
+
191
+
192
+ def _stream_jsonl(
193
+ iterator: Iterator[Any],
194
+ base_path: Path,
195
+ encoder: _JsonEncoder,
196
+ ) -> Path:
197
+ path = _ensure_suffix(base_path, ".jsonl")
198
+ path.parent.mkdir(parents=True, exist_ok=True)
199
+ with path.open("w", encoding="utf-8") as stream:
200
+ for record in iterator:
201
+ stream.write(encoder.encode(record))
202
+ stream.write("\n")
203
+ return path
204
+
205
+
206
+ def _stream_json_array(
207
+ iterator: Iterator[Any],
208
+ base_path: Path,
209
+ encoder: _JsonEncoder,
210
+ *,
211
+ indent: int | None,
212
+ ) -> Path:
213
+ path = _ensure_suffix(base_path, ".json")
214
+ path.parent.mkdir(parents=True, exist_ok=True)
215
+
216
+ if indent is None:
217
+ with path.open("w", encoding="utf-8") as stream:
218
+ first = True
219
+ stream.write("[")
220
+ for record in iterator:
221
+ if not first:
222
+ stream.write(",")
223
+ stream.write(encoder.encode(record))
224
+ first = False
225
+ stream.write("]")
226
+ return path
227
+
228
+ spacing = " " * indent
229
+ with path.open("w", encoding="utf-8") as stream:
230
+ written = False
231
+ for record in iterator:
232
+ encoded = encoder.encode(record)
233
+ if not written:
234
+ stream.write("[\n")
235
+ else:
236
+ stream.write(",\n")
237
+ stream.write(f"{spacing}{encoded}")
238
+ written = True
239
+ if not written:
240
+ stream.write("[]")
241
+ else:
242
+ stream.write("\n]")
243
+ return path
244
+
245
+
246
+ def _write_chunked_samples(
247
+ iterator: Iterator[Any],
248
+ config: JsonEmitConfig,
249
+ encoder: _JsonEncoder,
250
+ ) -> list[Path]:
251
+ chunk_size = max(1, config.shard_size or 1)
252
+ results: list[Path] = []
253
+
254
+ chunk = list(islice(iterator, chunk_size))
255
+ if not chunk:
256
+ results.append(_write_empty_shard(config.output_path, config.jsonl, encoder))
257
+ return results
258
+
259
+ index = 1
260
+ while chunk:
261
+ next_chunk = list(islice(iterator, chunk_size))
262
+ is_last = not next_chunk
263
+ path = _chunk_path(
264
+ config.output_path,
265
+ index=index,
266
+ is_last=is_last,
267
+ jsonl=config.jsonl,
268
+ )
269
+ payload = _prepare_payload(
270
+ chunk,
271
+ jsonl=config.jsonl,
272
+ encoder=encoder,
273
+ workers=_worker_count(config.max_workers, len(chunk)),
274
+ )
275
+ path.parent.mkdir(parents=True, exist_ok=True)
276
+ path.write_text(payload, encoding="utf-8")
277
+ results.append(path)
278
+
279
+ chunk = next_chunk
280
+ index += 1
281
+
282
+ return results
283
+
284
+
285
+ def _chunk_path(
286
+ base_path: Path,
287
+ *,
288
+ index: int,
289
+ is_last: bool,
290
+ jsonl: bool,
291
+ ) -> Path:
292
+ suffix = ".jsonl" if jsonl else ".json"
293
+ if is_last and index == 1:
294
+ return _ensure_suffix(base_path, suffix)
295
+
296
+ shard_total = 2 if (index > 1 or not is_last) else 1
297
+ return _shard_path(base_path, index, shard_total, jsonl)
298
+
299
+
300
+ def _shard_path(base_path: Path, shard_index: int, shard_count: int, jsonl: bool) -> Path:
301
+ suffix = ".jsonl" if jsonl else ".json"
302
+ if shard_count <= 1:
303
+ return _ensure_suffix(base_path, suffix)
304
+ stem = base_path.stem or base_path.name
305
+ parent = base_path.parent
306
+ return parent / f"{stem}-{shard_index:0{DEFAULT_SHARD_PAD}d}{suffix}"
307
+
308
+
309
+ def _ensure_suffix(path: Path, suffix: str) -> Path:
310
+ if path.suffix:
311
+ return path.with_suffix(suffix)
312
+ return path.with_name(f"{path.name}{suffix}")
313
+
314
+
315
+ def _normalise_record(record: Any) -> Any:
316
+ if dataclasses.is_dataclass(record) and not isinstance(record, type):
317
+ return dataclasses.asdict(record)
318
+ if isinstance(record, BaseModel):
319
+ return record.model_dump()
320
+ model_dump = getattr(record, "model_dump", None)
321
+ if callable(model_dump):
322
+ dump_call = cast(Callable[[], Any], model_dump)
323
+ return dump_call()
324
+ return record
325
+
326
+
327
+ class _JsonEncoder:
328
+ def __init__(self, *, indent: int | None, ensure_ascii: bool, use_orjson: bool) -> None:
329
+ self.indent = indent
330
+ self.ensure_ascii = ensure_ascii
331
+ self.use_orjson = use_orjson
332
+ self._options: int | None = None
333
+ if use_orjson:
334
+ if orjson is None:
335
+ raise RuntimeError("orjson is not installed but use_orjson was requested.")
336
+ self._options = _orjson_options(indent)
337
+
338
+ def encode(self, obj: Any) -> str:
339
+ normalized = _normalise_record(obj)
340
+ if self.use_orjson:
341
+ assert orjson is not None # for type checkers
342
+ options = self._options if self._options is not None else 0
343
+ bytes_payload = orjson.dumps(normalized, option=options)
344
+ return cast(bytes, bytes_payload).decode("utf-8")
345
+ return json.dumps(
346
+ normalized,
347
+ ensure_ascii=self.ensure_ascii,
348
+ indent=self.indent,
349
+ sort_keys=True,
350
+ )
351
+
352
+
353
+ def _orjson_options(indent: int | None) -> int:
354
+ if orjson is None: # pragma: no cover - defensive
355
+ raise RuntimeError("orjson is not available")
356
+ options = cast(int, orjson.OPT_SORT_KEYS)
357
+ if indent:
358
+ if indent != 2:
359
+ raise ValueError("orjson only supports indent=2.")
360
+ options |= cast(int, orjson.OPT_INDENT_2)
361
+ return options
362
+
363
+
364
+ def os_cpu_count() -> int | None:
365
+ try:
366
+ import os
367
+
368
+ return os.cpu_count()
369
+ except (ImportError, AttributeError): # pragma: no cover - fallback
370
+ return None
371
+
372
+
373
+ __all__ = ["JsonEmitConfig", "emit_json_samples"]