PyPI - TestDataX - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

TestDataX 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{testdatax-0.2.0 → testdatax-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: TestDataX
-Version: 0.2.0
+Version: 0.2.1
 Summary: A flexible test data generation toolkit
 License: MIT
 License-File: LICENSE
@@ -223,11 +223,10 @@ The schema file defines the structure and constraints of your generated data. Ea
 }
 ```
-> **Note:** `start_date`/`end_date` bound the generated range (inclusive). When
-> `format` is set, date/datetime values are rendered to a string with
-> `strftime`; for the SQL exporters this means the column receives a formatted
-> string literal rather than a native date, so `format` is best suited to the
-> CSV/JSON formats.
+> **Note:** `start_date`/`end_date` bound the generated range (inclusive).
+> `format` applies a `strftime` pattern to date/datetime values in the **CSV and
+> JSON** outputs only; the SQL, Parquet and ORC exporters keep native date types
+> and ignore `format`.
 #### Enum Fields
 ```json

{testdatax-0.2.0 → testdatax-0.2.1}/README.md RENAMED Viewed

@@ -200,11 +200,10 @@ The schema file defines the structure and constraints of your generated data. Ea
 }
 ```
-> **Note:** `start_date`/`end_date` bound the generated range (inclusive). When
-> `format` is set, date/datetime values are rendered to a string with
-> `strftime`; for the SQL exporters this means the column receives a formatted
-> string literal rather than a native date, so `format` is best suited to the
-> CSV/JSON formats.
+> **Note:** `start_date`/`end_date` bound the generated range (inclusive).
+> `format` applies a `strftime` pattern to date/datetime values in the **CSV and
+> JSON** outputs only; the SQL, Parquet and ORC exporters keep native date types
+> and ignore `format`.
 #### Enum Fields
 ```json

{testdatax-0.2.0 → testdatax-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "TestDataX"
-version = "0.2.0"
+version = "0.2.1"
 description = "A flexible test data generation toolkit"
 authors = ["JamesPBrett"]
 license = "MIT"
@@ -225,6 +225,7 @@ allowed_tags = [
     "refactor", # Code changes without fixing bugs or adding features
 ]
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

{testdatax-0.2.0 → testdatax-0.2.1}/src/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """TestDataX package initialization."""
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 from src.cli import app  # noqa

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/csv_exporter.py RENAMED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 from .base_exporter import BaseExporter
 from .utils.chunker import DataChunker
 from .utils.constants import CHUNK_SIZE_CSV
-from .utils.formatters import CSVFormatter
+from .utils.formatters import CSVFormatter, extract_formats
 logger = logging.getLogger(__name__)
@@ -76,9 +76,12 @@ class CsvExporter(BaseExporter):
             else:
                 fieldnames = list(data[0].keys())
+            formats = extract_formats(schema)
             first_chunk = True
             for chunk in self.chunker.chunk_data(data):
-                formatted_chunk = [self.formatter.format_row(row) for row in chunk]
+                formatted_chunk = [
+                    self.formatter.format_row(row, formats) for row in chunk
+                ]
                 df = pd.DataFrame(formatted_chunk, columns=fieldnames)
                 # Write the data to CSV in chunks

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/json_exporter.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import Any
 from .base_exporter import BaseExporter
 from .utils.chunker import DataChunker
 from .utils.constants import CHUNK_SIZE_JSON
-from .utils.formatters import JSONFormatter
+from .utils.formatters import JSONFormatter, extract_formats
 logger = logging.getLogger(__name__)
@@ -65,12 +65,13 @@ class JsonExporter(BaseExporter):
             # Stream a valid JSON array to disk one chunk at a time so the whole
             # dataset is never held in memory at once.
+            formats = extract_formats(schema)
             with open(output_path, "w", encoding="utf-8") as f:
                 f.write("[")
                 first = True
                 for chunk in self.chunker.chunk_data(data):
                     for row in chunk:
-                        formatted = self.formatter.format_row(row)
+                        formatted = self.formatter.format_row(row, formats)
                         block = json.dumps(formatted, indent=4)
                         indented = "\n".join(
                             "    " + line for line in block.splitlines()

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/mssql_exporter.py RENAMED Viewed

@@ -6,7 +6,7 @@ from uuid import UUID
 from .base_exporter import BaseExporter
 from .utils.constants import DEFAULT_SCHEMA
-from .utils.sql import escape_ansi_quotes
+from .utils.sql import escape_ansi_quotes, quote_mssql_ident
 MSSQL_TYPE_MAPPING = {
     "string": "NVARCHAR(255)",  # Unicode string support
@@ -32,6 +32,11 @@ class MssqlExporter(BaseExporter):
         """Escape a string for a T-SQL single-quoted literal (quote doubling)."""
         return escape_ansi_quotes(value)
+    @staticmethod
+    def _quote_ident(name: str) -> str:
+        """Quote a T-SQL identifier (table or column name) with brackets."""
+        return quote_mssql_ident(name)
     def _format_value(
         self,
         value: (
@@ -149,13 +154,15 @@ class MssqlExporter(BaseExporter):
                 and "values" in field_def
             ):
                 values = "','".join(self._escape(v) for v in field_def["values"])
-                check_constraints.append(f"CHECK ({field_name} IN ('{values}'))")
+                check_constraints.append(
+                    f"CHECK ({self._quote_ident(field_name)} IN ('{values}'))"
+                )
-            columns.append(f"    {field_name} {sql_type} NULL")
+            columns.append(f"    {self._quote_ident(field_name)} {sql_type} NULL")
         # Combine columns and check constraints
         return (
-            f"CREATE TABLE {table_name} (\n"
+            f"CREATE TABLE {self._quote_ident(table_name)} (\n"
             + ",\n".join(columns)
             + (
                 (",\n    " + ",\n    ".join(check_constraints))
@@ -184,9 +191,10 @@ class MssqlExporter(BaseExporter):
             'INSERT INTO users (id, name) VALUES (1, "test");'
         """
-        columns = ", ".join(row.keys())
+        columns = ", ".join(self._quote_ident(c) for c in row.keys())
         values = ", ".join(self._format_value(v) for v in row.values())
-        return f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
+        table = self._quote_ident(table_name)
+        return f"INSERT INTO {table} ({columns}) VALUES ({values});"
     def export(
         self, data: list[dict[str, Any]], output_path: str, schema: dict | None = None

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/mysql_exporter.py RENAMED Viewed

@@ -6,7 +6,7 @@ from uuid import UUID
 from .base_exporter import BaseExporter
 from .utils.constants import DEFAULT_SCHEMA
-from .utils.sql import escape_mysql_literal
+from .utils.sql import escape_mysql_literal, quote_mysql_ident
 MYSQL_TYPE_MAPPING = {
     "string": "VARCHAR(255)",
@@ -32,6 +32,11 @@ class MysqlExporter(BaseExporter):
         """Escape a string for a MySQL single-quoted literal."""
         return escape_mysql_literal(value)
+    @staticmethod
+    def _quote_ident(name: str) -> str:
+        """Quote a MySQL identifier (table or column name)."""
+        return quote_mysql_ident(name)
     def _format_value(
         self,
         value: (
@@ -147,9 +152,13 @@ class MysqlExporter(BaseExporter):
             ):
                 values = "','".join(self._escape(v) for v in field_def["values"])
                 sql_type = f"ENUM('{values}')"
-            columns.append(f"    {field_name} {sql_type} NULL")
+            columns.append(f"    {self._quote_ident(field_name)} {sql_type} NULL")
-        return f"CREATE TABLE {table_name} (\n" + ",\n".join(columns) + "\n);\n\n"
+        return (
+            f"CREATE TABLE {self._quote_ident(table_name)} (\n"
+            + ",\n".join(columns)
+            + "\n);\n\n"
+        )
     def _create_insert_stmt(
         self, row: dict[str, Any], table_name: str = "output"
@@ -170,9 +179,10 @@ class MysqlExporter(BaseExporter):
             'INSERT INTO users (id, name) VALUES (1, "test");'
         """
-        columns = ", ".join(row.keys())
+        columns = ", ".join(self._quote_ident(c) for c in row.keys())
         values = ", ".join(self._format_value(v) for v in row.values())
-        return f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
+        table = self._quote_ident(table_name)
+        return f"INSERT INTO {table} ({columns}) VALUES ({values});"
     def export(
         self, data: list[dict[str, Any]], output_path: str, schema: dict | None = None

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/oracle_exporter.py RENAMED Viewed

@@ -6,7 +6,7 @@ from uuid import UUID
 from .base_exporter import BaseExporter
 from .utils.constants import DEFAULT_SCHEMA
-from .utils.sql import escape_ansi_quotes
+from .utils.sql import escape_ansi_quotes, quote_ansi_ident
 ORACLE_TYPE_MAPPING = {
     "string": "VARCHAR2(255)",  # Oracle's Unicode string type
@@ -32,6 +32,11 @@ class OracleExporter(BaseExporter):
         """Escape a string for an Oracle single-quoted literal (quote doubling)."""
         return escape_ansi_quotes(value)
+    @staticmethod
+    def _quote_ident(name: str) -> str:
+        """Quote an Oracle identifier (table or column name) with double quotes."""
+        return quote_ansi_ident(name)
     def _format_value(
         self,
         value: (
@@ -156,13 +161,15 @@ class OracleExporter(BaseExporter):
                 and "values" in field_def
             ):
                 values = "','".join(self._escape(v) for v in field_def["values"])
-                check_constraints.append(f"CHECK ({field_name} IN ('{values}'))")
+                check_constraints.append(
+                    f"CHECK ({self._quote_ident(field_name)} IN ('{values}'))"
+                )
-            columns.append(f"    {field_name} {sql_type} NULL")
+            columns.append(f"    {self._quote_ident(field_name)} {sql_type} NULL")
         # Combine columns and check constraints
         return (
-            f"CREATE TABLE {table_name} (\n"
+            f"CREATE TABLE {self._quote_ident(table_name)} (\n"
             + ",\n".join(columns)
             + (
                 (",\n    " + ",\n    ".join(check_constraints))
@@ -191,9 +198,10 @@ class OracleExporter(BaseExporter):
             'INSERT INTO users (id, name) VALUES (1, "test");'
         """
-        columns = ", ".join(row.keys())
+        columns = ", ".join(self._quote_ident(c) for c in row.keys())
         values = ", ".join(self._format_value(v) for v in row.values())
-        return f"INSERT INTO {table_name} ({columns}) VALUES ({values});"
+        table = self._quote_ident(table_name)
+        return f"INSERT INTO {table} ({columns}) VALUES ({values});"
     def export(
         self, data: list[dict[str, Any]], output_path: str, schema: dict | None = None

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/utils/formatters.py RENAMED Viewed

@@ -6,6 +6,20 @@ from decimal import Decimal
 from typing import Any
+def extract_formats(schema: dict | None) -> dict[str, str]:
+    """Extract per-column ``strftime`` formats from a schema definition.
+    Only complex (dict) field definitions carrying a ``format`` key are included.
+    """
+    if not schema:
+        return {}
+    return {
+        name: field_def["format"]
+        for name, field_def in schema.items()
+        if isinstance(field_def, dict) and field_def.get("format")
+    }
 class BaseFormatter:
     """Base class for handling data type formatting across exporters."""
@@ -68,22 +82,28 @@ class BaseFormatter:
         pass
     def format_row(
-        self, row: dict[str, Any], **kwargs: dict[str, str | int | float]
+        self, row: dict[str, Any], formats: dict[str, str] | None = None
     ) -> dict[str, Any]:
-        """Format the provided rows with the correct format_value.
+        """Format the provided row with the correct format_value.
         Args:
             row: Dictionary containing row data
-            **kwargs: Additional format-specific parameters
+            formats: Optional per-column ``strftime`` formats applied to
+                date/datetime values before normal value formatting.
         Returns:
             Formatted row dictionary
         """
+        formats = formats or {}
         formatted_row: dict[str, Any] = {}
         for key, value in row.items():
             try:
-                formatted_row[key] = self.format_value(value)
+                fmt = formats.get(key)
+                if fmt and isinstance(value, (date | datetime)):
+                    formatted_row[key] = value.strftime(fmt)
+                else:
+                    formatted_row[key] = self.format_value(value)
             except Exception as e:
                 formatted_row[key] = f"ERROR: {str(e)}"
         return formatted_row

testdatax-0.2.1/src/exporters/utils/sql.py ADDED Viewed

@@ -0,0 +1,38 @@
+"""Shared SQL string-escaping helpers for the SQL exporters."""
+def escape_ansi_quotes(value: str) -> str:
+    """Escape an ANSI single-quoted SQL literal by doubling embedded quotes.
+    Used by dialects without backslash escaping (MSSQL, Oracle).
+    """
+    return value.replace("'", "''")
+def escape_mysql_literal(value: str) -> str:
+    """Escape a MySQL single-quoted literal.
+    The backslash is escaped first so a value containing a backslash cannot
+    terminate the literal early, then single quotes and newlines are escaped.
+    """
+    return value.replace("\\", "\\\\").replace("'", "\\'").replace("\n", "\\n")
+def quote_mysql_ident(name: str) -> str:
+    """Quote a MySQL identifier with backticks, doubling embedded backticks."""
+    return "`" + name.replace("`", "``") + "`"
+def quote_mssql_ident(name: str) -> str:
+    """Quote a T-SQL identifier with brackets, doubling embedded ``]``."""
+    return "[" + name.replace("]", "]]") + "]"
+def quote_ansi_ident(name: str) -> str:
+    """Quote an ANSI/Oracle identifier with double quotes, doubling embedded ``"``.
+    Note: Oracle treats a double-quoted identifier as case-sensitive, so the
+    generated DDL and DML deliberately reference every identifier quoted to stay
+    self-consistent.
+    """
+    return '"' + name.replace('"', '""') + '"'

{testdatax-0.2.0 → testdatax-0.2.1}/src/generator.py RENAMED Viewed

@@ -83,7 +83,11 @@ class DataGenerator:
     def _generate_field_value(
         self, field: FieldSchema, unique_seen: dict[str, set[GeneratedValue]]
     ) -> GeneratedValue:
-        """Produce a single value, honoring nullable/unique/format options."""
+        """Produce a single value, honoring the nullable and unique options.
+        Native (typed) values are returned; date ``format`` rendering is handled
+        by the file-format exporters so SQL/Parquet/ORC keep native date types.
+        """
         if field.unique:
             seen = unique_seen[field.name]
             for _ in range(MAX_UNIQUE_RETRIES):
@@ -91,12 +95,12 @@ class DataGenerator:
                 if value is None or value not in seen:
                     if value is not None:
                         seen.add(value)
-                    return self._apply_format(field, value)
+                    return value
             raise ValueError(
                 f"Could not generate a unique value for field '{field.name}' "
                 f"after {MAX_UNIQUE_RETRIES} attempts"
             )
-        return self._apply_format(field, self._produce_value(field))
+        return self._produce_value(field)
     def _produce_value(self, field: FieldSchema) -> GeneratedValue:
         """Generate a raw value for a field, applying only the nullable option."""
@@ -104,17 +108,6 @@ class DataGenerator:
             return None
         return self.type_generators[field.type](field)
-    @staticmethod
-    def _apply_format(field: FieldSchema, value: GeneratedValue) -> GeneratedValue:
-        """Render date/datetime values via strftime when a format is set.
-        Applied after the uniqueness check so uniqueness is tracked on the raw
-        (high-cardinality) value rather than the formatted string.
-        """
-        if field.format and isinstance(value, (date | datetime)):
-            return value.strftime(field.format)
-        return value
     def _generate_string(self, field: FieldSchema) -> str:
         provider_value = str(field.value_provider) if field.value_provider else "name"
         return self.provider.generate_string(

testdatax-0.2.0/src/exporters/utils/sql.py DELETED Viewed

@@ -1,18 +0,0 @@
-"""Shared SQL string-escaping helpers for the SQL exporters."""
-def escape_ansi_quotes(value: str) -> str:
-    """Escape an ANSI single-quoted SQL literal by doubling embedded quotes.
-    Used by dialects without backslash escaping (MSSQL, Oracle).
-    """
-    return value.replace("'", "''")
-def escape_mysql_literal(value: str) -> str:
-    """Escape a MySQL single-quoted literal.
-    The backslash is escaped first so a value containing a backslash cannot
-    terminate the literal early, then single quotes and newlines are escaped.
-    """
-    return value.replace("\\", "\\\\").replace("'", "\\'").replace("\n", "\\n")

{testdatax-0.2.0 → testdatax-0.2.1}/LICENSE RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/cli.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/__init__.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/base_exporter.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/orc_exporter.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/parquet_exporter.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/utils/__init__.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/utils/chunker.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/utils/constants.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/exporters/utils/exporter_config.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/providers/__init__.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/providers/base.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/providers/faker_provider.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/providers/mimesis_provider.py RENAMED Viewed

File without changes

{testdatax-0.2.0 → testdatax-0.2.1}/src/schemas.py RENAMED Viewed

File without changes

TestDataX 0.2.0__tar.gz → 0.2.1__tar.gz

TestDataX 0.2.0tar.gz → 0.2.1tar.gz