diffprep 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffprep/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger(__name__)
4
+ logger.debug("Logging is ready")
diffprep/cli/app.py ADDED
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ import typer
4
+
5
+ from diffprep.cli.commands import init as init_module
6
+ from diffprep.cli.commands.json import json_command
7
+ from diffprep.cli.commands.xml import xml_command
8
+
9
+ app = typer.Typer(help="Command-line JSON and XML diff preprocessor.")
10
+
11
+ app.command("json")(json_command)
12
+ app.command("xml")(xml_command)
13
+ app.add_typer(init_module.app, name="init")
14
+
15
+
16
+ if __name__ == "__main__":
17
+ app()
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+
7
+ from diffprep.cli.io import write_stdout_bytes
8
+ from diffprep.cli.prompting import build_llm_prompt, build_prompt_context
9
+ from diffprep.core import Settings
10
+ from diffprep.processors import get_processor
11
+
12
+
13
+ def read_input_bytes(path: Path) -> bytes:
14
+ try:
15
+ return path.read_bytes()
16
+ except OSError as exc:
17
+ raise typer.BadParameter(f"Could not read input file: {path}") from exc
18
+
19
+
20
+ def run_normalizer(settings: Settings, path: Path) -> None:
21
+ if not settings.cli_options:
22
+ return
23
+ data = read_input_bytes(path)
24
+
25
+ processor = get_processor(settings)
26
+ out = processor(data, settings)
27
+
28
+ if settings.cli_options.question:
29
+ context = build_prompt_context(
30
+ question=settings.cli_options.question,
31
+ input_type=settings.cli_options.input_type,
32
+ original_input=data,
33
+ normalized_output=out,
34
+ settings=settings,
35
+ )
36
+ prompt = build_llm_prompt(context)
37
+ write_stdout_bytes(prompt.encode("utf-8"))
38
+ return
39
+
40
+ write_stdout_bytes(out)
@@ -0,0 +1,141 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Final, Literal
6
+
7
+ import typer
8
+
9
+ TOOL_NAME: Final[str] = "diffprep"
10
+ BEGIN_MARKER: Final[str] = "# BEGIN diffprep"
11
+ END_MARKER: Final[str] = "# END diffprep"
12
+
13
+ Status = Literal["created", "updated", "unchanged"]
14
+
15
+ app = typer.Typer()
16
+
17
+
18
+ @dataclass(frozen=True, slots=True)
19
+ class DiffDriver:
20
+ pattern: str
21
+ command_name: str
22
+
23
+
24
+ DEFAULT_DRIVERS: Final[tuple[DiffDriver, ...]] = (
25
+ DiffDriver("*.json", "json"),
26
+ DiffDriver("*.xml", "xml"),
27
+ )
28
+
29
+
30
+ def render_gitattributes() -> str:
31
+ lines = [BEGIN_MARKER]
32
+ lines.extend(
33
+ f"{driver.pattern} diff={driver.command_name}" for driver in DEFAULT_DRIVERS
34
+ )
35
+ lines.append(END_MARKER)
36
+ return "\n".join(lines) + "\n"
37
+
38
+
39
+ def render_gitconfig_repo() -> str:
40
+ lines = [BEGIN_MARKER]
41
+
42
+ for driver in DEFAULT_DRIVERS:
43
+ lines.extend(
44
+ (
45
+ f'[diff "{driver.command_name}"]',
46
+ f" textconv = {TOOL_NAME} {driver.command_name}",
47
+ "",
48
+ )
49
+ )
50
+
51
+ if lines[-1] == "":
52
+ lines.pop()
53
+
54
+ lines.append(END_MARKER)
55
+ return "\n".join(lines) + "\n"
56
+
57
+
58
+ def render_unix_wrapper() -> str:
59
+ return """#!/usr/bin/env sh
60
+ set -eu
61
+
62
+ repo_root="$(git rev-parse --show-toplevel)"
63
+ git -c include.path="$repo_root/.gitconfig.repo" diff "$@"
64
+ """
65
+
66
+
67
+ def replace_or_append_managed_block(existing: str, managed_block: str) -> str:
68
+ begin_index = existing.find(BEGIN_MARKER)
69
+ end_index = existing.find(END_MARKER)
70
+
71
+ if begin_index != -1 and end_index != -1 and end_index > begin_index:
72
+ before = existing[:begin_index].rstrip()
73
+ after = existing[end_index + len(END_MARKER) :].lstrip()
74
+
75
+ parts: list[str] = []
76
+ if before:
77
+ parts.append(before)
78
+ parts.append(managed_block.rstrip())
79
+ if after:
80
+ parts.append(after)
81
+ return "\n\n".join(parts) + "\n"
82
+
83
+ existing_clean = existing.rstrip()
84
+ if not existing_clean:
85
+ return managed_block
86
+
87
+ return f"{existing_clean}\n\n{managed_block}"
88
+
89
+
90
+ def write_text_file(path: Path, content: str) -> Status:
91
+ previous = path.read_text(encoding="utf-8") if path.exists() else None
92
+ if previous == content:
93
+ return "unchanged"
94
+
95
+ path.write_text(content, encoding="utf-8")
96
+ return "created" if previous is None else "updated"
97
+
98
+
99
+ def write_managed_block_file(path: Path, managed_block: str) -> Status:
100
+ existing = path.read_text(encoding="utf-8") if path.exists() else ""
101
+ new_content = replace_or_append_managed_block(existing, managed_block)
102
+
103
+ if existing == new_content:
104
+ return "unchanged"
105
+
106
+ path.write_text(new_content, encoding="utf-8")
107
+ return "created" if not existing else "updated"
108
+
109
+
110
+ def make_executable(path: Path) -> None:
111
+ path.chmod(path.stat().st_mode | 0o111)
112
+
113
+
114
+ def run_init() -> None:
115
+ gitattributes_path = Path(".gitattributes")
116
+ gitconfig_repo_path = Path(".gitconfig.repo")
117
+ wrapper_path = Path(TOOL_NAME)
118
+
119
+ gitattributes_status = write_managed_block_file(
120
+ gitattributes_path,
121
+ render_gitattributes(),
122
+ )
123
+ gitconfig_status = write_text_file(gitconfig_repo_path, render_gitconfig_repo())
124
+ wrapper_status = write_text_file(wrapper_path, render_unix_wrapper())
125
+ make_executable(wrapper_path)
126
+
127
+ typer.echo(f"{gitattributes_status:9} {gitattributes_path.name}")
128
+ typer.echo(f"{gitconfig_status:9} {gitconfig_repo_path.name}")
129
+ typer.echo(f"{wrapper_status:9} {wrapper_path.name}")
130
+ typer.echo()
131
+ typer.echo("Run:")
132
+ typer.echo(f" ./{TOOL_NAME}")
133
+
134
+
135
+ @app.callback(invoke_without_command=True)
136
+ def main() -> None:
137
+ run_init()
138
+
139
+
140
+ if __name__ == "__main__":
141
+ app()
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+ from typing import Annotated
6
+
7
+ import typer
8
+
9
+ from diffprep.cli.commands._normalize import run_normalizer
10
+ from diffprep.cli.models import JsonCliOptions
11
+ from diffprep.core import init_settings
12
+ from diffprep.types import InputType
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def json_command(
18
+ path: Annotated[Path, typer.Argument(exists=True, readable=True)],
19
+ question: Annotated[str | None, typer.Option("--question", "-q")] = None,
20
+ drop_keys: Annotated[
21
+ list[str] | None,
22
+ typer.Option(
23
+ "--drop-key",
24
+ "-d",
25
+ help="Remove one or more JSON keys before normalization.",
26
+ ),
27
+ ] = None,
28
+ ) -> None:
29
+ cli_options = JsonCliOptions(
30
+ input_type=InputType.JSON,
31
+ question=question,
32
+ drop_keys=set(drop_keys or ()),
33
+ )
34
+ settings = init_settings(cli_options)
35
+ logging.debug(settings)
36
+
37
+ run_normalizer(settings, path)
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Annotated
5
+
6
+ import typer
7
+
8
+ from diffprep.cli.commands._normalize import run_normalizer
9
+ from diffprep.cli.models import XmlCliOptions
10
+ from diffprep.core import init_settings
11
+ from diffprep.types import InputType
12
+
13
+
14
+ def xml_command(
15
+ path: Annotated[Path, typer.Argument(exists=True, readable=True)],
16
+ question: Annotated[str | None, typer.Option("--question", "-q")] = None,
17
+ ) -> None:
18
+ cli_options = XmlCliOptions(
19
+ input_type=InputType.XML,
20
+ question=question,
21
+ )
22
+ settings = init_settings(cli_options)
23
+
24
+ run_normalizer(settings, path)
@@ -0,0 +1,36 @@
1
+ from collections.abc import Callable
2
+ from enum import StrEnum
3
+ from typing import Annotated, get_args, get_origin
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ def enum_values_for_field(
9
+ settings_cls: type[BaseModel],
10
+ field_name: str,
11
+ ) -> list[str]:
12
+ annotation = settings_cls.model_fields[field_name].annotation
13
+ origin = get_origin(annotation)
14
+ if origin is Annotated:
15
+ annotation = get_args(annotation)[0]
16
+ if isinstance(annotation, type) and issubclass(annotation, StrEnum):
17
+ return [member.value for member in annotation]
18
+ return []
19
+
20
+
21
+ def description_for_field(
22
+ settings_cls: type[BaseModel],
23
+ field_name: str,
24
+ ) -> str:
25
+ field = settings_cls.model_fields[field_name]
26
+ return field.description or ""
27
+
28
+
29
+ def enum_field_completion(
30
+ settings_cls: type[BaseModel],
31
+ field_name: str,
32
+ ) -> Callable[[str], list[str]]:
33
+ values = enum_values_for_field(settings_cls, field_name)
34
+ return lambda incomplete: [
35
+ value for value in values if value.startswith(incomplete)
36
+ ]
diffprep/cli/io.py ADDED
@@ -0,0 +1,12 @@
1
+ import sys
2
+
3
+
4
+ def read_stdin_bytes() -> bytes:
5
+ data = sys.stdin.buffer.read()
6
+ if not data:
7
+ raise ValueError("No input received on stdin.")
8
+ return data
9
+
10
+
11
+ def write_stdout_bytes(data: bytes) -> None:
12
+ sys.stdout.buffer.write(data)
diffprep/cli/models.py ADDED
@@ -0,0 +1,61 @@
1
+ from typing import Literal, override
2
+
3
+ from pydantic import Field
4
+
5
+ from diffprep.core import JsonSettings, Settings, XmlSettings
6
+ from diffprep.core.configs import CliOptions
7
+ from diffprep.types import InputType
8
+
9
+
10
+ class JsonCliOptions(CliOptions, JsonSettings):
11
+ input_type: Literal[InputType.JSON] = InputType.JSON
12
+
13
+ drop_keys: set[str] = Field(
14
+ default_factory=set,
15
+ description="JSON object keys to remove recursively before output.",
16
+ )
17
+
18
+ def to_settings(self) -> JsonSettings:
19
+ return JsonSettings(
20
+ drop_keys=self.drop_keys,
21
+ indent=None if self.style == "compact" else self.indent,
22
+ sort_keys=self.sort_keys,
23
+ ensure_ascii=self.ensure_ascii,
24
+ style=self.style,
25
+ )
26
+
27
+ @override
28
+ def apply_to(self, settings: Settings) -> Settings:
29
+ json_update = self.to_settings().model_dump(exclude_unset=True)
30
+
31
+ return settings.model_copy(
32
+ update={
33
+ "json_settings": settings.json_settings.model_copy(update=json_update),
34
+ }
35
+ )
36
+
37
+
38
+ class XmlCliOptions(CliOptions, XmlSettings):
39
+ input_type: Literal[InputType.XML] = InputType.XML
40
+
41
+ drop_attrs: set[str] = Field(
42
+ default_factory=set,
43
+ description="XML attributes to remove recursively before output.",
44
+ )
45
+ drop_tags: set[str] = Field(
46
+ default_factory=set,
47
+ description="XML tags to remove recursively before output.",
48
+ )
49
+
50
+ def to_settings(self) -> XmlSettings:
51
+ return XmlSettings()
52
+
53
+ @override
54
+ def apply_to(self, settings: Settings) -> Settings:
55
+ update = self.to_settings().model_dump(exclude_unset=True)
56
+
57
+ return settings.model_copy(
58
+ update={
59
+ "json_settings": settings.json_settings.model_copy(update=update),
60
+ }
61
+ )
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import BaseModel, ConfigDict, Field
4
+
5
+ from diffprep.core.configs import Settings
6
+ from diffprep.types import InputType
7
+
8
+ DEFAULT_MAX_PROMPT_CHARS = 4_000
9
+
10
+
11
+ def render_settings_help(settings: BaseModel, prefix: str = "") -> str:
12
+ lines: list[str] = []
13
+
14
+ for name, field in settings.model_fields.items():
15
+ value = getattr(settings, name)
16
+ path = f"{prefix}.{name}" if prefix else name
17
+
18
+ if isinstance(value, BaseModel):
19
+ lines.append(render_settings_help(value, path))
20
+ else:
21
+ env = f"DIFFPREP_{path.replace('.', '__')}"
22
+ desc = field.description or ""
23
+ lines.append(f"{path} = {value!r} ({env}) # {desc}")
24
+
25
+ return "\n".join(lines)
26
+
27
+
28
+ class PromptContext(BaseModel):
29
+ """Structured context passed to the LLM prompt renderer."""
30
+
31
+ model_config = ConfigDict(extra="forbid", frozen=True)
32
+
33
+ question: str = Field(min_length=1)
34
+ input_type: InputType
35
+ original_input: str
36
+ normalized_output: str
37
+ settings: Settings
38
+
39
+ @property
40
+ def current_command(self) -> str:
41
+ """CLI command that reproduces the current invocation."""
42
+ return f"diffprep {self.settings.cli_options}"
43
+
44
+
45
+ def decode_bytes(data: bytes, /) -> str:
46
+ """Decode bytes as UTF-8, replacing invalid sequences."""
47
+ return data.decode("utf-8", errors="replace")
48
+
49
+
50
+ def truncate_text(text: str, /, *, max_chars: int = DEFAULT_MAX_PROMPT_CHARS) -> str:
51
+ """Truncate text and append a notice if the text was shortened."""
52
+ if len(text) <= max_chars:
53
+ return text
54
+
55
+ remaining = len(text) - max_chars
56
+ return (
57
+ f"{text[:max_chars]}\n\n"
58
+ f"[truncated: showing first {max_chars} of {len(text)} characters; "
59
+ f"{remaining} omitted]"
60
+ )
61
+
62
+
63
+ def build_prompt_context(
64
+ *,
65
+ question: str,
66
+ input_type: InputType,
67
+ original_input: bytes,
68
+ normalized_output: bytes,
69
+ settings: Settings,
70
+ max_chars: int = DEFAULT_MAX_PROMPT_CHARS,
71
+ ) -> PromptContext:
72
+ """Create the structured context used to generate the LLM prompt."""
73
+
74
+ cleaned_question = question.strip()
75
+
76
+ return PromptContext(
77
+ question=cleaned_question,
78
+ input_type=input_type,
79
+ original_input=truncate_text(decode_bytes(original_input), max_chars=max_chars),
80
+ normalized_output=truncate_text(
81
+ decode_bytes(normalized_output),
82
+ max_chars=max_chars,
83
+ ),
84
+ settings=settings,
85
+ )
86
+
87
+
88
+ def build_llm_prompt(context: PromptContext) -> str:
89
+ """Render the final LLM prompt string."""
90
+
91
+ settings_text = context.settings.model_dump_json(indent=2, exclude_none=True)
92
+ settings_help = render_settings_help(context.settings)
93
+
94
+ return f"""
95
+ User question:
96
+ {context.question}
97
+
98
+ --- PROMPT START ---
99
+
100
+ You are helping a user of the CLI tool `diffprep`.
101
+
102
+ Tool purpose:
103
+ `diffprep` preprocesses structured input such as JSON or XML so the output is more stable and useful for diffs.
104
+
105
+ User question:
106
+ {context.question}
107
+
108
+ Current command:
109
+ {context.current_command}
110
+
111
+ Settings (current values and env overrides):
112
+ {settings_help}
113
+
114
+ Input type:
115
+ {context.input_type.value}
116
+
117
+ Resolved settings:
118
+ {settings_text}
119
+
120
+ Original input:
121
+ {context.original_input}
122
+
123
+ Current output:
124
+ {context.normalized_output}
125
+
126
+ Please answer in a practical way.
127
+ Include:
128
+ 1. what is likely missing or incorrect
129
+ 2. the exact command, config, or environment variable change needed
130
+ 3. a minimal working example
131
+ 4. any common mistakes relevant to this case
132
+
133
+ --- PROMPT END ---
134
+ """
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from diffprep.core.logger import setup_logging
6
+
7
+ from .configs import (
8
+ CliOptions,
9
+ JsonSettings,
10
+ NormalizeSettings,
11
+ Settings,
12
+ XmlSettings,
13
+ )
14
+
15
+ _CURRENT_SETTINGS: Settings | None = None
16
+
17
+
18
+ def init_settings(cli_options: CliOptions) -> Settings:
19
+ global _CURRENT_SETTINGS
20
+
21
+ settings = Settings(cli_options=cli_options)
22
+
23
+ if cli_options is not None:
24
+ settings = cli_options.apply_to(settings)
25
+
26
+ setup_logging(settings.logger_settings)
27
+ logging.debug("Settings initialized: \n%s\n", settings.model_dump_json(indent=4))
28
+
29
+ _CURRENT_SETTINGS = settings
30
+ return settings
31
+
32
+
33
+ def get_settings() -> Settings:
34
+ if _CURRENT_SETTINGS is None:
35
+ raise RuntimeError("Settings have not been initialized.")
36
+ return _CURRENT_SETTINGS
37
+
38
+
39
+ __all__ = [
40
+ "JsonSettings",
41
+ "NormalizeSettings",
42
+ "Settings",
43
+ "XmlSettings",
44
+ "get_settings",
45
+ "init_settings",
46
+ ]
@@ -0,0 +1,142 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from typing import ClassVar, Literal, override
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+ from pydantic_settings import (
9
+ BaseSettings,
10
+ PydanticBaseSettingsSource,
11
+ PyprojectTomlConfigSettingsSource,
12
+ SettingsConfigDict,
13
+ )
14
+
15
+ from diffprep.types import InputType
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class CliOptions(BaseModel, ABC):
21
+ model_config = ConfigDict(frozen=True, extra="forbid")
22
+
23
+ input_type: InputType
24
+ question: str | None = None
25
+
26
+ @abstractmethod
27
+ def apply_to(self, settings: Settings) -> Settings:
28
+ """Return a new Settings object with these CLI overrides applied."""
29
+ raise NotImplementedError
30
+
31
+
32
+ class LoggerSettings(BaseModel):
33
+ level: Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] = Field(
34
+ default="INFO",
35
+ description="Logging level.",
36
+ )
37
+ fmt: str = Field(
38
+ default="%(filename)-16s:%(lineno)1d %(message)s",
39
+ description="Logging format string.",
40
+ )
41
+ disable_existing_loggers: bool = Field(
42
+ default=False,
43
+ description="Disable pre-existing loggers when configuring logging.",
44
+ )
45
+
46
+
47
+ class NormalizeSettings(BaseModel):
48
+ trailing_newline: bool = Field(
49
+ default=True,
50
+ description="Ensure normalized output ends with a trailing newline.",
51
+ )
52
+
53
+
54
+ class JsonSettings(BaseModel):
55
+ drop_keys: set[str] = Field(
56
+ default_factory=set,
57
+ description="JSON object keys to remove recursively before output.",
58
+ )
59
+ indent: int | str | None = Field(
60
+ default=4,
61
+ description="Indentation used for pretty JSON output.",
62
+ )
63
+ sort_keys: bool = Field(
64
+ default=True,
65
+ description="Sort JSON object keys for stable output.",
66
+ )
67
+ ensure_ascii: bool = Field(
68
+ default=False,
69
+ description="Escape non-ASCII characters in JSON output.",
70
+ )
71
+ style: Literal["pretty", "compact"] = Field(
72
+ default="pretty",
73
+ description="JSON output style.",
74
+ )
75
+
76
+
77
+ class XmlSettings(BaseModel):
78
+ drop_tags: set[str] = Field(
79
+ default_factory=set,
80
+ description="XML tags to remove recursively before output.",
81
+ )
82
+ drop_attrs: set[str] = Field(
83
+ default_factory=set,
84
+ description="XML attributes to remove from all elements.",
85
+ )
86
+ indent: int = Field(
87
+ default=4,
88
+ ge=0,
89
+ description="Indentation used for pretty XML output.",
90
+ )
91
+ pretty: bool = Field(
92
+ default=True,
93
+ description="Pretty-print XML output.",
94
+ )
95
+ declaration: bool = Field(
96
+ default=False,
97
+ description="Include XML declaration in serialized output.",
98
+ )
99
+ sort_attrs: bool = Field(
100
+ default=True,
101
+ description="Sort XML attributes for stable output.",
102
+ )
103
+ strip_text: bool = Field(
104
+ default=True,
105
+ description="Strip surrounding whitespace from XML text nodes where applicable.",
106
+ )
107
+
108
+
109
+ class Settings(BaseSettings):
110
+ model_config: ClassVar[SettingsConfigDict] = SettingsConfigDict(
111
+ extra="ignore",
112
+ validate_assignment=True,
113
+ pyproject_toml_table_header=("tool", "diffprep"),
114
+ pyproject_toml_depth=3,
115
+ env_prefix="DIFFPREP_",
116
+ env_nested_delimiter="__",
117
+ )
118
+
119
+ normalize: NormalizeSettings = Field(default_factory=NormalizeSettings)
120
+ json_settings: JsonSettings = Field(default_factory=JsonSettings)
121
+ xml_settings: XmlSettings = Field(default_factory=XmlSettings)
122
+ logger_settings: LoggerSettings = Field(default_factory=LoggerSettings)
123
+ cli_options: CliOptions
124
+
125
+ @classmethod
126
+ @override
127
+ def settings_customise_sources(
128
+ cls,
129
+ settings_cls: type[BaseSettings],
130
+ init_settings: PydanticBaseSettingsSource,
131
+ env_settings: PydanticBaseSettingsSource,
132
+ dotenv_settings: PydanticBaseSettingsSource,
133
+ file_secret_settings: PydanticBaseSettingsSource,
134
+ ) -> tuple[PydanticBaseSettingsSource, ...]:
135
+ pyproject_settings = PyprojectTomlConfigSettingsSource(settings_cls)
136
+ return (
137
+ init_settings,
138
+ env_settings,
139
+ dotenv_settings,
140
+ pyproject_settings,
141
+ file_secret_settings,
142
+ )
@@ -0,0 +1,32 @@
1
+ import logging
2
+ import logging.config
3
+
4
+ from diffprep.core.configs import LoggerSettings
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def build_logging_config(settings: LoggerSettings) -> dict[str, object]:
10
+ return {
11
+ "version": 1,
12
+ "disable_existing_loggers": settings.disable_existing_loggers,
13
+ "formatters": {
14
+ "default": {
15
+ "format": settings.fmt,
16
+ }
17
+ },
18
+ "handlers": {
19
+ "console": {
20
+ "class": "logging.StreamHandler",
21
+ "formatter": "default",
22
+ }
23
+ },
24
+ "root": {
25
+ "handlers": ["console"],
26
+ "level": settings.level,
27
+ },
28
+ }
29
+
30
+
31
+ def setup_logging(settings: LoggerSettings) -> None:
32
+ logging.config.dictConfig(build_logging_config(settings))
@@ -0,0 +1,33 @@
1
+ import importlib
2
+ import logging
3
+ import pkgutil
4
+
5
+ from ._registry import (
6
+ Processor,
7
+ get_processor,
8
+ register_processor,
9
+ validate_registered_processors,
10
+ )
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def _load_processors() -> None:
16
+ modules = [
17
+ mod.name
18
+ for mod in pkgutil.iter_modules(__path__)
19
+ if not mod.name.startswith("_")
20
+ ]
21
+ logger.debug("Discovered processor modules: %s", modules)
22
+
23
+ for name in modules:
24
+ module_name = f"{__name__}.{name}"
25
+ logger.debug("Loading processor module: %s", module_name)
26
+ importlib.import_module(module_name)
27
+
28
+
29
+ _load_processors()
30
+ logger.debug("Validating registered processors")
31
+ validate_registered_processors()
32
+
33
+ __all__ = ["Processor", "get_processor", "register_processor"]
@@ -0,0 +1,56 @@
1
+ from collections.abc import Callable, Mapping
2
+ from types import MappingProxyType
3
+
4
+ from diffprep.core import Settings
5
+ from diffprep.processors._types import (
6
+ DuplicateProcessorError,
7
+ Processor,
8
+ UnknownProcessorError,
9
+ )
10
+ from diffprep.types import InputType
11
+
12
+ # from diffprep.cli.models import NormalizeOptions
13
+
14
+ _PROCESSORS: dict[InputType, Processor] = {}
15
+
16
+
17
+ def register_processor(input_type: InputType) -> Callable[[Processor], Processor]:
18
+ def decorator(processor: Processor) -> Processor:
19
+ if input_type in _PROCESSORS:
20
+ raise DuplicateProcessorError(
21
+ f"Processor already registered for {input_type.value!r}"
22
+ )
23
+ _PROCESSORS[input_type] = processor
24
+ return processor
25
+
26
+ return decorator
27
+
28
+
29
+ def get_processor(options: Settings, /) -> Processor:
30
+ processor = _PROCESSORS.get(options.cli_options.input_type)
31
+ if processor is None:
32
+ available = ", ".join(
33
+ item.value for item in sorted(_PROCESSORS, key=lambda t: t.value)
34
+ )
35
+ raise UnknownProcessorError(
36
+ f"Unsupported input type {options.cli_options.input_type.value!r}. "
37
+ f"Available: {available or '<none>'}"
38
+ )
39
+ return processor
40
+
41
+
42
+ def list_processors() -> tuple[InputType, ...]:
43
+ return tuple(sorted(_PROCESSORS, key=lambda t: t.value))
44
+
45
+
46
+ def processor_mapping() -> Mapping[InputType, Processor]:
47
+ return MappingProxyType(_PROCESSORS)
48
+
49
+
50
+ def validate_registered_processors() -> None:
51
+ missing = set(InputType) - _PROCESSORS.keys()
52
+ if missing:
53
+ missing_names = ", ".join(
54
+ item.value for item in sorted(missing, key=lambda t: t.value)
55
+ )
56
+ raise RuntimeError(f"Missing processor registrations: {missing_names}")
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+
5
+ from diffprep.core import Settings
6
+
7
+ type Processor = Callable[[bytes, Settings], bytes]
8
+ # type Processor = Callable[[bytes], bytes]
9
+
10
+
11
+ class ProcessorRegistryError(Exception):
12
+ """Base error for processor registry problems."""
13
+
14
+
15
+ class DuplicateProcessorError(ProcessorRegistryError):
16
+ """Raised when a processor is registered twice for the same input type."""
17
+
18
+
19
+ class UnknownProcessorError(ProcessorRegistryError):
20
+ """Raised when no processor exists for the requested input type."""
@@ -0,0 +1,66 @@
1
+ import json
2
+ import logging
3
+
4
+ from diffprep.core import Settings
5
+ from diffprep.core.configs import JsonSettings, NormalizeSettings
6
+ from diffprep.processors import register_processor
7
+ from diffprep.types import InputType, JSONValue
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def _decode_json(data: bytes) -> JSONValue:
13
+ try:
14
+ return json.loads(data)
15
+ except (UnicodeDecodeError, json.JSONDecodeError) as exc:
16
+ raise ValueError("Invalid JSON input") from exc
17
+
18
+
19
+ def _scrub_json(value: JSONValue, drop_keys: set[str]) -> JSONValue:
20
+ if isinstance(value, dict):
21
+ return {
22
+ key: _scrub_json(child, drop_keys)
23
+ for key, child in value.items()
24
+ if key not in drop_keys
25
+ }
26
+
27
+ if isinstance(value, list):
28
+ return [_scrub_json(item, drop_keys) for item in value]
29
+
30
+ return value
31
+
32
+
33
+ def _normalize_json(
34
+ cleaned: JSONValue,
35
+ *,
36
+ json_settings: JsonSettings,
37
+ normalize_settings: NormalizeSettings,
38
+ ) -> bytes:
39
+ separators = (",", ":") if json_settings.style == "compact" else None
40
+
41
+ normalized = json.dumps(
42
+ cleaned,
43
+ sort_keys=json_settings.sort_keys,
44
+ separators=separators,
45
+ ensure_ascii=json_settings.ensure_ascii,
46
+ indent=json_settings.indent,
47
+ )
48
+
49
+ if normalize_settings.trailing_newline:
50
+ normalized += "\n"
51
+
52
+ return normalized.encode("utf-8")
53
+
54
+
55
+ @register_processor(InputType.JSON)
56
+ def process_json(data: bytes, settings: Settings) -> bytes:
57
+ logging.debug(settings)
58
+
59
+ parsed = _decode_json(data)
60
+ cleaned = _scrub_json(parsed, settings.json_settings.drop_keys)
61
+
62
+ return _normalize_json(
63
+ cleaned,
64
+ json_settings=settings.json_settings,
65
+ normalize_settings=settings.normalize,
66
+ )
@@ -0,0 +1,15 @@
1
+ import logging
2
+
3
+ from diffprep.core import Settings
4
+ from diffprep.processors import register_processor
5
+ from diffprep.types import InputType
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ @register_processor(input_type=InputType.XML)
11
+ def process_xml(data: bytes, settings: Settings) -> bytes:
12
+ logger.debug(settings)
13
+
14
+ out: bytes = data
15
+ return out
diffprep/types.py ADDED
@@ -0,0 +1,10 @@
1
+ from enum import StrEnum
2
+
3
+ type JSONValue = (
4
+ dict[str, JSONValue] | list[JSONValue] | str | int | float | bool | None
5
+ )
6
+
7
+
8
+ class InputType(StrEnum):
9
+ JSON = "json"
10
+ XML = "xml"
@@ -0,0 +1,102 @@
1
+ Metadata-Version: 2.3
2
+ Name: diffprep
3
+ Version: 0.1.0a1
4
+ Summary: Command-line JSON and XML diff preprocessor.
5
+ Author: Dominik Ocsofszki
6
+ Author-email: Dominik Ocsofszki <dominikocsofszki@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Dominik Ocsofszki
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ Requires-Dist: lxml>=6.0.2
29
+ Requires-Dist: pydantic-settings>=2.13.1
30
+ Requires-Dist: typer>=0.24.1
31
+ Requires-Python: >=3.13
32
+ Description-Content-Type: text/markdown
33
+
34
+ # CLI
35
+
36
+ Command-line JSON and XML diff preprocessor.
37
+
38
+ **Usage**:
39
+
40
+ ```console
41
+ $ [OPTIONS] COMMAND [ARGS]...
42
+ ```
43
+
44
+ **Options**:
45
+
46
+ * `--install-completion`: Install completion for the current shell.
47
+ * `--show-completion`: Show completion for the current shell, to copy it or customize the installation.
48
+ * `--help`: Show this message and exit.
49
+
50
+ **Commands**:
51
+
52
+ * `json`
53
+ * `xml`
54
+ * `init`
55
+
56
+ ## `json`
57
+
58
+ **Usage**:
59
+
60
+ ```console
61
+ $ json [OPTIONS] PATH
62
+ ```
63
+
64
+ **Arguments**:
65
+
66
+ * `PATH`: [required]
67
+
68
+ **Options**:
69
+
70
+ * `-q, --question TEXT`
71
+ * `-d, --drop-key TEXT`: Remove one or more JSON keys before normalization.
72
+ * `--help`: Show this message and exit.
73
+
74
+ ## `xml`
75
+
76
+ **Usage**:
77
+
78
+ ```console
79
+ $ xml [OPTIONS] PATH
80
+ ```
81
+
82
+ **Arguments**:
83
+
84
+ * `PATH`: [required]
85
+
86
+ **Options**:
87
+
88
+ * `-q, --question TEXT`
89
+ * `--help`: Show this message and exit.
90
+
91
+ ## `init`
92
+
93
+ **Usage**:
94
+
95
+ ```console
96
+ $ init [OPTIONS] COMMAND [ARGS]...
97
+ ```
98
+
99
+ **Options**:
100
+
101
+ * `--help`: Show this message and exit.
102
+
@@ -0,0 +1,23 @@
1
+ diffprep/__init__.py,sha256=ae5023179478a2c3015a16db9290d04ded463f174638aee7bec69028aa39b9d3,86
2
+ diffprep/cli/app.py,sha256=bd5eb296358e0817873b75ce91e5a77b2f6cf5b679fa1ac4a8cf3673d1c79db1,428
3
+ diffprep/cli/commands/_normalize.py,sha256=5a397f1c3a183ef34a82ab03f6b9640167cc924dce1ef569df00828cb1928723,1127
4
+ diffprep/cli/commands/init.py,sha256=a42d42aa1c01c1be17da529fd734bbef881357954aba141356282b456582a5ce,3742
5
+ diffprep/cli/commands/json.py,sha256=2e79f65ec1ad0b3f77de8812427602bdf6a0fd4c936175645999f3cb9107b27b,984
6
+ diffprep/cli/commands/xml.py,sha256=694feaf318a22b5729ea38da9c88381508e2fe9a30541b9cc899e91e9fdf43e6,645
7
+ diffprep/cli/completion.py,sha256=2731db28cf73d32e7ad8397b31ba9ebe54e86ca80d9a501e4966241e7730432b,1026
8
+ diffprep/cli/io.py,sha256=ecf92f29c60ce670787f6d34ee0530cb8cd565f49bf4cd493f0a0151a6ac8ca4,251
9
+ diffprep/cli/models.py,sha256=02535abeab728ac3d05d2c499db9877cbd36b8a989bcd605210e0047d131b3b0,1838
10
+ diffprep/cli/prompting.py,sha256=19f20ef46befae7f4234dd6967b878be1cea32e771a79bcbe58e0e99b6c1e245,3536
11
+ diffprep/core/__init__.py,sha256=6e5ac018f653d13f10a47c68af4e12154004402abec8e16fda658c63f91a2753,961
12
+ diffprep/core/configs.py,sha256=daa7c4a41dd4d076a03d403362b8b420fca910ebd0063ab84c39435565afd226,4279
13
+ diffprep/core/logger.py,sha256=0e4010e4c99d4493e9a80d30cc47281f5514bc3e6662f1e2089aff25cf9b473c,808
14
+ diffprep/processors/__init__.py,sha256=0a6a68ffa8c21dfb998b46a92d051aecdbaf844ca5d36be028c92a80947b4c46,779
15
+ diffprep/processors/_registry.py,sha256=82d7a629a085c93577a3752706cbaa4faef56765160d96d6cd524e974a71b93c,1753
16
+ diffprep/processors/_types.py,sha256=e85ee032683798a7718a20f6f8217a73b5fa0158dbe654a28ff0833166936efb,566
17
+ diffprep/processors/json.py,sha256=f35770265735d021e2df8c1bb2fed6d1423bc26c28e3402e79ee4830b7a6626d,1755
18
+ diffprep/processors/xml.py,sha256=5eb4ee7a380ed778772e1da62acc435022c1af1b038cac111bc59c0a1eb7d105,349
19
+ diffprep/types.py,sha256=113c354a7d01effc3f43312c6e0ffa3fce683e6d554a343b15503920c8d189e4,186
20
+ diffprep-0.1.0a1.dist-info/WHEEL,sha256=607c46fee47e440c91332c738096ff0f5e54ca3b0818ee85462dd5172a38e793,79
21
+ diffprep-0.1.0a1.dist-info/entry_points.txt,sha256=a0f058d64ab9b216d0c0f467d6ef3c43f507cb04a55ba62746f839928d2557fa,51
22
+ diffprep-0.1.0a1.dist-info/METADATA,sha256=eccd31501288eeebf218ba2f2b6708cfa69bb6cb27914172617cb55faedccc4c,2559
23
+ diffprep-0.1.0a1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.7.19
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ diffprep = diffprep.cli.app:app
3
+