fauxdata-cli 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. fauxdata_cli-0.1.2/.coverage +0 -0
  2. fauxdata_cli-0.1.2/LICENSE +21 -0
  3. fauxdata_cli-0.1.2/LOG.md +34 -0
  4. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/PKG-INFO +2 -2
  5. fauxdata_cli-0.1.2/docs/deployment.md +90 -0
  6. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/docs/index.html +31 -5
  7. fauxdata_cli-0.1.2/docs/share.png +0 -0
  8. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/pyproject.toml +11 -2
  9. fauxdata_cli-0.1.2/share.png +0 -0
  10. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/__init__.py +1 -1
  11. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/commands/generate.py +12 -2
  12. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/generator.py +14 -6
  13. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/main.py +15 -1
  14. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/schema.py +8 -0
  15. fauxdata_cli-0.1.2/tests/__init__.py +0 -0
  16. fauxdata_cli-0.1.2/tests/conftest.py +39 -0
  17. fauxdata_cli-0.1.2/tests/test_cli.py +112 -0
  18. fauxdata_cli-0.1.2/tests/test_generator.py +102 -0
  19. fauxdata_cli-0.1.2/tests/test_new_fields.py +167 -0
  20. fauxdata_cli-0.1.2/tests/test_output.py +107 -0
  21. fauxdata_cli-0.1.2/tests/test_schema.py +147 -0
  22. fauxdata_cli-0.1.2/tests/test_validator.py +123 -0
  23. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/uv.lock +220 -25
  24. fauxdata_cli-0.1.1/LOG.md +0 -10
  25. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/.claude/settings.local.json +0 -0
  26. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/.gitignore +0 -0
  27. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/.python-version +0 -0
  28. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/README.md +0 -0
  29. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/schemas/events.yml +0 -0
  30. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/schemas/orders.yml +0 -0
  31. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/schemas/people.yml +0 -0
  32. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/commands/__init__.py +0 -0
  33. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/commands/init.py +0 -0
  34. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/commands/preview.py +0 -0
  35. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/commands/validate.py +0 -0
  36. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/output.py +0 -0
  37. {fauxdata_cli-0.1.1 → fauxdata_cli-0.1.2}/src/fauxdata/validator.py +0 -0
Binary file
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Andrea Borruso <aborruso@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,34 @@
1
+ # Log
2
+
3
+ ## 2026-03-06 — v0.1.2
4
+
5
+ - Bump to 0.1.2 and publish to PyPI
6
+
7
+ ## 2026-03-06 (feature)
8
+
9
+ - `--version` / `-V` flag nel CLI (`fauxdata --version` → `fauxdata 0.1.1`)
10
+ - Coverage threshold 80% in pytest config (`--cov-fail-under=80`); attuale: 83.76%
11
+ - Nuovo campo `pattern` in ColumnSchema: genera stringhe che matchano un regex via pointblank
12
+ - Nuovo campo `null_probability` in ColumnSchema: controllo granulare dei null (0.0–1.0), con validazione in parsing
13
+ - Rimossa dipendenza `faker` (non usata, pointblank gestisce tutto)
14
+ - Fix generator: `null_probability=None` non passato a pointblank (causa TypeError)
15
+ - Test aggiornati: 79/79 pass
16
+
17
+ ## 2026-03-06 (tests)
18
+
19
+ - Add pytest test suite: 65 tests, 100% pass, 0.44s
20
+ - `tests/test_schema.py`: unit tests for YAML schema parsing (valid/invalid cases)
21
+ - `tests/test_output.py`: unit tests for export functions (all formats, stdout, errors)
22
+ - `tests/test_generator.py`: integration tests for generation (types, seed, unique, presets)
23
+ - `tests/test_validator.py`: integration tests for validation rules (pass/fail scenarios)
24
+ - `tests/test_cli.py`: CLI smoke tests via `typer.testing.CliRunner`
25
+ - Add `[dependency-groups] dev` in `pyproject.toml` (pytest, pytest-cov); config via `[tool.pytest.ini_options]`
26
+
27
+ ## 2026-03-06
28
+
29
+ - Initial implementation of `fauxdata` CLI
30
+ - Stack: pointblank 0.22 (native generation + validation), polars, typer, rich, pyfiglet, questionary
31
+ - Commands: `init`, `generate`, `validate`, `preview`
32
+ - Example schemas: `people.yml`, `orders.yml`, `events.yml`
33
+ - All schemas generate and validate cleanly (all rules PASS)
34
+ - `locale` field at schema level maps to pointblank `country=` param
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fauxdata-cli
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: CLI for generating and validating fake datasets
5
5
  Project-URL: Homepage, https://aborruso.github.io/fauxdata/
6
6
  Project-URL: Repository, https://github.com/aborruso/fauxdata
7
7
  Project-URL: Bug Tracker, https://github.com/aborruso/fauxdata/issues
8
+ License-File: LICENSE
8
9
  Requires-Python: >=3.11
9
- Requires-Dist: faker>=26.0
10
10
  Requires-Dist: pointblank>=0.22
11
11
  Requires-Dist: polars>=1.0
12
12
  Requires-Dist: pyfiglet>=1.0
@@ -0,0 +1,90 @@
1
+ # Deployment rules
2
+
3
+ ## Pre-release checklist (always)
4
+
5
+ 1. Run tests locally — must all pass:
6
+
7
+ ```bash
8
+ uv run pytest
9
+ ```
10
+
11
+ Coverage must stay above 80%. If it drops, fix before proceeding.
12
+
13
+ 2. Bump version in **both**:
14
+ - `src/fauxdata/__init__.py` → `__version__ = "X.Y.Z"`
15
+ - `pyproject.toml` → `version = "X.Y.Z"`
16
+
17
+ 3. Update `LOG.md` with a summary of changes under a new date heading.
18
+
19
+ ---
20
+
21
+ ## GitHub release (tag + release notes)
22
+
23
+ ```bash
24
+ # Create and push annotated tag
25
+ git tag -a vX.Y.Z -m "vX.Y.Z"
26
+ git push origin vX.Y.Z
27
+ ```
28
+
29
+ Then create a GitHub release via `gh`:
30
+
31
+ ```bash
32
+ gh release create vX.Y.Z \
33
+ --title "vX.Y.Z" \
34
+ --notes "$(cat <<'EOF'
35
+ ## What's new
36
+
37
+ - Short bullet list of user-facing changes
38
+ - Include new fields, commands, bug fixes
39
+
40
+ ## Breaking changes
41
+
42
+ - List any breaking changes here (or remove section if none)
43
+
44
+ ## Installation
45
+
46
+ \`\`\`bash
47
+ pip install fauxdata-cli==X.Y.Z
48
+ \`\`\`
49
+
50
+ Full changelog: https://github.com/aborruso/fauxdata/commits/vX.Y.Z
51
+ EOF
52
+ )"
53
+ ```
54
+
55
+ Release notes style: **concise, nerd-friendly, technical**. List the actual changes with enough detail that a developer understands what changed and why.
56
+
57
+ ---
58
+
59
+ ## PyPI publish (via twine)
60
+
61
+ ```bash
62
+ # Build
63
+ uv build
64
+
65
+ # Check the dist
66
+ twine check dist/*
67
+
68
+ # Publish
69
+ twine upload dist/*
70
+ ```
71
+
72
+ Requires `~/.pypirc` configured with PyPI token, or set `TWINE_USERNAME`/`TWINE_PASSWORD` env vars.
73
+
74
+ ---
75
+
76
+ ## Order of operations
77
+
78
+ ```
79
+ uv run pytest # must pass 100%
80
+ bump version # __init__.py + pyproject.toml
81
+ update LOG.md
82
+ git commit + git push
83
+ git tag + git push tag
84
+ gh release create # with release notes
85
+ uv build
86
+ twine check dist/*
87
+ twine upload dist/*
88
+ ```
89
+
90
+ Never publish to PyPI without a corresponding GitHub release.
@@ -5,8 +5,32 @@
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
6
  <title>fauxdata — fake data, done right</title>
7
7
  <meta name="description" content="fauxdata is a CLI tool for generating and validating realistic fake datasets from YAML schemas. Locale-aware, pipeline-friendly, powered by pointblank.">
8
+ <meta name="keywords" content="fake data, synthetic data, dataset generator, CLI, YAML schema, pointblank, data testing, fake dataset, CSV generator, Parquet">
9
+ <meta name="author" content="Andrea Borruso">
10
+ <meta name="robots" content="index, follow">
11
+ <link rel="canonical" href="https://aborruso.github.io/fauxdata/">
12
+
13
+ <!-- Open Graph -->
14
+ <meta property="og:type" content="website">
15
+ <meta property="og:url" content="https://aborruso.github.io/fauxdata/">
8
16
  <meta property="og:title" content="fauxdata — fake data, done right">
9
17
  <meta property="og:description" content="Generate and validate realistic fake datasets from YAML schemas. Because fake data can actually be better than real data.">
18
+ <meta property="og:image" content="https://aborruso.github.io/fauxdata/share.png">
19
+ <meta property="og:image:width" content="1200">
20
+ <meta property="og:image:height" content="630">
21
+ <meta property="og:image:alt" content="fauxdata — CLI tool for generating realistic fake datasets">
22
+ <meta property="og:site_name" content="fauxdata">
23
+ <meta property="og:locale" content="en_US">
24
+
25
+ <!-- Twitter Card -->
26
+ <meta name="twitter:card" content="summary_large_image">
27
+ <meta name="twitter:url" content="https://aborruso.github.io/fauxdata/">
28
+ <meta name="twitter:title" content="fauxdata — fake data, done right">
29
+ <meta name="twitter:description" content="Generate and validate realistic fake datasets from YAML schemas. Because fake data can actually be better than real data.">
30
+ <meta name="twitter:image" content="https://aborruso.github.io/fauxdata/share.png">
31
+ <meta name="twitter:image:alt" content="fauxdata — CLI tool for generating realistic fake datasets">
32
+ <meta name="twitter:creator" content="@aborruso">
33
+
10
34
  <link rel="preconnect" href="https://fonts.googleapis.com">
11
35
  <link href="https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:ital,wght@0,300;0,400;0,600;0,700;1,400&family=VT323&display=swap" rel="stylesheet">
12
36
  <style>
@@ -876,11 +900,13 @@
876
900
  <span class="t-line dim" style="padding-left:2rem">| duckdb -c "SELECT status, COUNT(*) FROM '/dev/stdin' GROUP BY ALL"</span>
877
901
  <br>
878
902
  <span class="t-line t-out">┌───────────┬──────────┐</span>
879
- <span class="t-line t-out">│ status │ count(*) │</span>
880
- <span class="t-line t-out">│ delivered 3124 │</span>
881
- <span class="t-line t-out">│ shipped │ 2891 │</span>
882
- <span class="t-line t-out">│ pending 2003 │</span>
883
- <span class="t-line t-out">│ cancelled 1982 │</span>
903
+ <span class="t-line t-out">│ status │ count(*) │</span>
904
+ <span class="t-line t-out">│ varchar int64 │</span>
905
+ <span class="t-line t-out">├───────────┼──────────┤</span>
906
+ <span class="t-line t-out">│ delivered 3124 │</span>
907
+ <span class="t-line t-out">│ shipped 2891 │</span>
908
+ <span class="t-line t-out">│ pending │ 2003 │</span>
909
+ <span class="t-line t-out">│ cancelled │ 1982 │</span>
884
910
  <span class="t-line t-out">└───────────┴──────────┘</span>
885
911
  </div>
886
912
  </div>
Binary file
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "fauxdata-cli"
3
- version = "0.1.1"
3
+ version = "0.1.2"
4
4
  description = "CLI for generating and validating fake datasets"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -12,7 +12,6 @@ dependencies = [
12
12
  "questionary>=2.0",
13
13
  "polars>=1.0",
14
14
  "pyyaml>=6.0",
15
- "faker>=26.0",
16
15
  ]
17
16
 
18
17
  [project.urls]
@@ -23,6 +22,16 @@ Repository = "https://github.com/aborruso/fauxdata"
23
22
  [project.scripts]
24
23
  fauxdata = "fauxdata.main:app"
25
24
 
25
+ [dependency-groups]
26
+ dev = [
27
+ "pytest>=8.0",
28
+ "pytest-cov>=5.0",
29
+ ]
30
+
31
+ [tool.pytest.ini_options]
32
+ testpaths = ["tests"]
33
+ addopts = "--tb=short --cov=fauxdata --cov-report=term-missing --cov-fail-under=80"
34
+
26
35
  [build-system]
27
36
  requires = ["hatchling"]
28
37
  build-backend = "hatchling.build"
Binary file
@@ -1,3 +1,3 @@
1
1
  """fauxdata - CLI for generating and validating fake datasets."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.2"
@@ -61,13 +61,22 @@ def _print_schema_table(schema, n: int, seed):
61
61
  t = Table(title=f"Schema: {schema.name}", show_header=True, header_style="bold magenta")
62
62
  t.add_column("Column", style="cyan")
63
63
  t.add_column("Type")
64
- t.add_column("Preset/Values")
64
+ t.add_column("Preset/Pattern/Values")
65
65
  t.add_column("Min")
66
66
  t.add_column("Max")
67
67
  t.add_column("Unique")
68
+ t.add_column("Null%")
68
69
 
69
70
  for col in schema.columns:
70
- preset_val = col.preset or (str(col.values) if col.values else "-")
71
+ if col.pattern:
72
+ preset_val = f"pattern:{col.pattern}"
73
+ elif col.preset:
74
+ preset_val = col.preset
75
+ elif col.values:
76
+ preset_val = str(col.values)
77
+ else:
78
+ preset_val = "-"
79
+ null_pct = f"{int(col.null_probability * 100)}%" if col.null_probability else "-"
71
80
  t.add_row(
72
81
  col.name,
73
82
  col.col_type,
@@ -75,6 +84,7 @@ def _print_schema_table(schema, n: int, seed):
75
84
  str(col.min) if col.min is not None else "-",
76
85
  str(col.max) if col.max is not None else "-",
77
86
  "yes" if col.unique else "no",
87
+ null_pct,
78
88
  )
79
89
 
80
90
  console.print(t)
@@ -30,8 +30,10 @@ def _build_pb_schema(schema: SchemaConfig) -> pb.Schema:
30
30
 
31
31
  def _col_to_field(col: ColumnSchema):
32
32
  """Convert a ColumnSchema to a pointblank field spec."""
33
- nullable = col.nullable
33
+ nullable = col.nullable or (col.null_probability is not None and col.null_probability > 0)
34
34
  unique = col.unique
35
+ # Build optional kwargs only when null_probability is explicitly set
36
+ np_kwargs = {"null_probability": col.null_probability} if col.null_probability is not None else {}
35
37
 
36
38
  if col.col_type == "int":
37
39
  return pb.int_field(
@@ -39,6 +41,7 @@ def _col_to_field(col: ColumnSchema):
39
41
  max_val=int(col.max) if col.max is not None else None,
40
42
  nullable=nullable,
41
43
  unique=unique,
44
+ **np_kwargs,
42
45
  )
43
46
 
44
47
  elif col.col_type == "float":
@@ -47,10 +50,11 @@ def _col_to_field(col: ColumnSchema):
47
50
  max_val=float(col.max) if col.max is not None else None,
48
51
  nullable=nullable,
49
52
  unique=unique,
53
+ **np_kwargs,
50
54
  )
51
55
 
52
56
  elif col.col_type == "bool":
53
- return pb.bool_field(nullable=nullable)
57
+ return pb.bool_field(nullable=nullable, **np_kwargs)
54
58
 
55
59
  elif col.col_type == "date":
56
60
  return pb.date_field(
@@ -58,6 +62,7 @@ def _col_to_field(col: ColumnSchema):
58
62
  max_date=str(col.max) if col.max is not None else None,
59
63
  nullable=nullable,
60
64
  unique=unique,
65
+ **np_kwargs,
61
66
  )
62
67
 
63
68
  elif col.col_type == "datetime":
@@ -66,15 +71,18 @@ def _col_to_field(col: ColumnSchema):
66
71
  max_date=str(col.max) if col.max is not None else None,
67
72
  nullable=nullable,
68
73
  unique=unique,
74
+ **np_kwargs,
69
75
  )
70
76
 
71
77
  elif col.col_type == "string":
72
78
  if col.values:
73
- return pb.string_field(allowed=col.values, nullable=nullable)
79
+ return pb.string_field(allowed=col.values, nullable=nullable, **np_kwargs)
80
+ elif col.pattern:
81
+ return pb.string_field(pattern=col.pattern, nullable=nullable, unique=unique, **np_kwargs)
74
82
  elif col.preset:
75
- return pb.string_field(preset=col.preset, nullable=nullable, unique=unique)
83
+ return pb.string_field(preset=col.preset, nullable=nullable, unique=unique, **np_kwargs)
76
84
  else:
77
- return pb.string_field(nullable=nullable, unique=unique)
85
+ return pb.string_field(nullable=nullable, unique=unique, **np_kwargs)
78
86
 
79
87
  else:
80
- return pb.string_field(nullable=nullable)
88
+ return pb.string_field(nullable=nullable, **np_kwargs)
@@ -9,6 +9,8 @@ import typer
9
9
  from rich import print as rprint
10
10
  from rich.console import Console
11
11
 
12
+ from fauxdata import __version__
13
+
12
14
  app = typer.Typer(
13
15
  name="fauxdata",
14
16
  help="Generate and validate fake datasets from YAML schemas.",
@@ -23,8 +25,20 @@ def _banner():
23
25
  rprint("[dim]Generate and validate realistic fake datasets[/dim]\n")
24
26
 
25
27
 
28
+ def _version_callback(value: bool):
29
+ if value:
30
+ rprint(f"fauxdata {__version__}")
31
+ raise typer.Exit()
32
+
33
+
26
34
  @app.callback(invoke_without_command=True)
27
- def main(ctx: typer.Context):
35
+ def main(
36
+ ctx: typer.Context,
37
+ version: Optional[bool] = typer.Option(
38
+ None, "--version", "-V", callback=_version_callback, is_eager=True,
39
+ help="Show version and exit.",
40
+ ),
41
+ ):
28
42
  if ctx.invoked_subcommand is None:
29
43
  _banner()
30
44
  rprint(ctx.get_help())
@@ -57,6 +57,8 @@ class ColumnSchema:
57
57
  locale: str | None = None
58
58
  precision: int | None = None
59
59
  values: list | None = None # for in_set
60
+ pattern: str | None = None # regex pattern for string generation
61
+ null_probability: float | None = None # e.g. 0.1 = 10% nulls
60
62
 
61
63
 
62
64
  @dataclass
@@ -142,6 +144,10 @@ def _parse_column(name: str, data: dict) -> ColumnSchema:
142
144
  if preset and preset not in STRING_PRESETS:
143
145
  raise ValueError(f"Column '{name}': unknown preset '{preset}'. Valid: {STRING_PRESETS}")
144
146
 
147
+ null_probability = data.get("null_probability", None)
148
+ if null_probability is not None and not (0.0 <= float(null_probability) <= 1.0):
149
+ raise ValueError(f"Column '{name}': null_probability must be between 0.0 and 1.0")
150
+
145
151
  return ColumnSchema(
146
152
  name=name,
147
153
  col_type=col_type,
@@ -153,6 +159,8 @@ def _parse_column(name: str, data: dict) -> ColumnSchema:
153
159
  locale=data.get("locale", None),
154
160
  precision=data.get("precision", None),
155
161
  values=data.get("values", None),
162
+ pattern=data.get("pattern", None),
163
+ null_probability=float(null_probability) if null_probability is not None else None,
156
164
  )
157
165
 
158
166
 
File without changes
@@ -0,0 +1,39 @@
1
+ """Shared fixtures for fauxdata tests."""
2
+
3
+ import pytest
4
+ import polars as pl
5
+
6
+ from fauxdata.schema import SchemaConfig, ColumnSchema, ValidationRule
7
+
8
+
9
+ @pytest.fixture
10
+ def minimal_schema():
11
+ """A minimal SchemaConfig with one int and one string column."""
12
+ return SchemaConfig(
13
+ name="test",
14
+ rows=10,
15
+ seed=42,
16
+ locale="US",
17
+ output_format="csv",
18
+ columns=[
19
+ ColumnSchema(name="id", col_type="int", min=1, max=100, unique=True),
20
+ ColumnSchema(name="name", col_type="string", preset="name"),
21
+ ],
22
+ )
23
+
24
+
25
+ @pytest.fixture
26
+ def simple_df():
27
+ """A small deterministic DataFrame for validation tests."""
28
+ return pl.DataFrame({
29
+ "id": [1, 2, 3],
30
+ "age": [25, 40, 55],
31
+ "email": ["a@b.com", "c@d.com", "e@f.com"],
32
+ })
33
+
34
+
35
+ @pytest.fixture
36
+ def people_schema_path():
37
+ """Path to the existing people.yml schema."""
38
+ from pathlib import Path
39
+ return str(Path(__file__).parent.parent / "schemas" / "people.yml")
@@ -0,0 +1,112 @@
1
+ """Smoke tests for the fauxdata CLI using typer's CliRunner."""
2
+
3
+ import textwrap
4
+ import pytest
5
+ from typer.testing import CliRunner
6
+
7
+ from fauxdata.main import app
8
+
9
+ runner = CliRunner()
10
+
11
+
12
+ def test_cli_no_args():
13
+ """Running fauxdata with no args should show help."""
14
+ result = runner.invoke(app, [])
15
+ assert result.exit_code == 0
16
+ assert "fauxdata" in result.output.lower() or "generate" in result.output.lower()
17
+
18
+
19
+ def test_cli_help():
20
+ result = runner.invoke(app, ["--help"])
21
+ assert result.exit_code == 0
22
+ assert "generate" in result.output
23
+
24
+
25
+ def test_cli_generate_help():
26
+ result = runner.invoke(app, ["generate", "--help"])
27
+ assert result.exit_code == 0
28
+ assert "--rows" in result.output
29
+ assert "--format" in result.output
30
+
31
+
32
+ def test_cli_generate_csv(tmp_path, people_schema_path):
33
+ out = tmp_path / "out.csv"
34
+ result = runner.invoke(app, ["generate", people_schema_path, "--rows", "5",
35
+ "--out", str(out), "--format", "csv", "--seed", "1"])
36
+ assert result.exit_code == 0, result.output
37
+ assert out.exists()
38
+
39
+
40
+ def test_cli_generate_json(tmp_path, people_schema_path):
41
+ out = tmp_path / "out.json"
42
+ result = runner.invoke(app, ["generate", people_schema_path, "--rows", "5",
43
+ "--out", str(out), "--format", "json", "--seed", "1"])
44
+ assert result.exit_code == 0, result.output
45
+ assert out.exists()
46
+
47
+
48
+ def test_cli_generate_parquet(tmp_path, people_schema_path):
49
+ out = tmp_path / "out.parquet"
50
+ result = runner.invoke(app, ["generate", people_schema_path, "--rows", "5",
51
+ "--out", str(out), "--format", "parquet", "--seed", "1"])
52
+ assert result.exit_code == 0, result.output
53
+ assert out.exists()
54
+
55
+
56
+ def test_cli_generate_stdout(people_schema_path, capsys):
57
+ result = runner.invoke(app, ["generate", people_schema_path, "--rows", "3",
58
+ "--out", "-", "--format", "csv", "--seed", "1"])
59
+ assert result.exit_code == 0, result.output
60
+
61
+
62
+ def test_cli_generate_with_validate(tmp_path, people_schema_path):
63
+ out = tmp_path / "out.csv"
64
+ result = runner.invoke(app, ["generate", people_schema_path, "--rows", "10",
65
+ "--out", str(out), "--format", "csv",
66
+ "--seed", "42", "--validate"])
67
+ assert result.exit_code == 0, result.output
68
+
69
+
70
+ def test_cli_generate_missing_schema(tmp_path):
71
+ result = runner.invoke(app, ["generate", "/nonexistent/schema.yml"])
72
+ assert result.exit_code != 0
73
+
74
+
75
+ def test_cli_validate(tmp_path, people_schema_path):
76
+ """Generate a file then validate it."""
77
+ out = tmp_path / "people.csv"
78
+ runner.invoke(app, ["generate", people_schema_path, "--rows", "10",
79
+ "--out", str(out), "--format", "csv", "--seed", "42"])
80
+ result = runner.invoke(app, ["validate", str(out), people_schema_path])
81
+ assert result.exit_code == 0, result.output
82
+
83
+
84
+ def test_cli_preview(tmp_path, people_schema_path):
85
+ out = tmp_path / "people.csv"
86
+ runner.invoke(app, ["generate", people_schema_path, "--rows", "20",
87
+ "--out", str(out), "--format", "csv", "--seed", "42"])
88
+ result = runner.invoke(app, ["preview", str(out), "--rows", "5"])
89
+ assert result.exit_code == 0, result.output
90
+
91
+
92
+ def test_cli_generate_inline_schema(tmp_path):
93
+ """Test with a minimal inline schema written to a tmp file."""
94
+ schema_yaml = textwrap.dedent("""\
95
+ name: mini
96
+ rows: 5
97
+ columns:
98
+ id:
99
+ type: int
100
+ min: 1
101
+ max: 100
102
+ label:
103
+ type: string
104
+ values: ["a", "b"]
105
+ """)
106
+ schema_path = tmp_path / "mini.yml"
107
+ schema_path.write_text(schema_yaml)
108
+ out = tmp_path / "mini.csv"
109
+ result = runner.invoke(app, ["generate", str(schema_path),
110
+ "--out", str(out), "--format", "csv", "--seed", "1"])
111
+ assert result.exit_code == 0, result.output
112
+ assert out.exists()