PyPI - cryptic-md - Versions diffs - 1.0.2__py3-none-any.whl - Mend

cryptic-md 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

cryptic/__init__.py +1 -0
cryptic/__main__.py +20 -0
cryptic/args.py +55 -0
cryptic/chat.py +35 -0
cryptic/cmds.py +150 -0
cryptic/config.py +146 -0
cryptic/markdown.py +115 -0
cryptic/models.py +308 -0
cryptic/note.py +152 -0
cryptic/prompts/categorize.txt +8 -0
cryptic/service.py +270 -0
cryptic_md-1.0.2.dist-info/METADATA +127 -0
cryptic_md-1.0.2.dist-info/RECORD +16 -0
cryptic_md-1.0.2.dist-info/WHEEL +4 -0
cryptic_md-1.0.2.dist-info/entry_points.txt +3 -0
cryptic_md-1.0.2.dist-info/licenses/LICENSE.md +22 -0

cryptic/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = '1.0.2'

cryptic/__main__.py ADDED Viewed

@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : __main__.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+# Date   : 29.10.2024
+# (c) Camille Scott, 2024
+import sys
+from .cmds import commands
+def main():
+    return commands.run()
+if __name__ == '__main__':
+    sys.exit(main())

cryptic/args.py ADDED Viewed

@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : args.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+# Date   : 31.10.2024
+# (c) Camille Scott, 2024
+from argparse import Action
+from enum import Enum
+from pathlib import Path
+from ponderosa import CmdTree, ArgParser, arggroup
+class EnumAction(Action):
+    """
+    Argparse action for handling Enums
+    """
+    def __init__(self, **kwargs):
+        # Pop off the type value
+        enum = kwargs.pop("type", None)
+        # Ensure an Enum subclass is provided
+        if enum is None:
+            raise ValueError("type must be assigned an Enum when using EnumAction")
+        if not issubclass(enum, Enum):
+            raise TypeError("type must be an Enum when using EnumAction")
+        # Generate choices from the Enum
+        kwargs.setdefault("choices", tuple(e.name for e in enum))
+        super(EnumAction, self).__init__(**kwargs)
+        self._enum = enum
+    def __call__(self, parser, namespace, values, option_string=None):
+        # Convert value back into an Enum
+        enum = self._enum[values]
+        setattr(namespace, self.dest, enum)
+commands = CmdTree()
+@commands.root.args("Config", common=True)
+def common_args(parser: ArgParser):
+    parser.add_argument('--config', type=Path, default=None,
+                        help='Path to YAML config file.')
+    parser.add_argument('--model', type=str, default=None,
+                        help='Override the configured default model.')
+    parser.add_argument('--reasoning', type=str, default=None,
+                        choices=['low', 'medium', 'high', 'xhigh'],
+                        help='Override the configured reasoning effort.')

cryptic/chat.py ADDED Viewed

@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : chat.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+# Date   : 23.10.2024
+# (c) Camille Scott, 2024
+from typing import Type
+from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletion
+from .models import BaseNoteSummary, NoteSummary
+async def summarize_page(
+    client: AsyncOpenAI,
+    content: str,
+    *,
+    model: str,
+    system_prompt: str,
+    reasoning: str,
+    schema: Type[BaseNoteSummary] = NoteSummary,
+) -> tuple[BaseNoteSummary | None, ChatCompletion]:
+    completion = await client.chat.completions.parse(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": content},
+        ],
+        response_format=schema,
+        reasoning_effort=reasoning,
+    )
+    return completion.choices[0].message.parsed, completion

cryptic/cmds.py ADDED Viewed

@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : cmds.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+# Date   : 28.10.2024
+# (c) Camille Scott, 2024
+from argparse import Namespace
+from pathlib import Path
+import shutil
+from dotenv import load_dotenv
+from openai import AsyncOpenAI
+from rich.console import Console
+from .args import ArgParser, arggroup, commands, common_args, EnumAction
+from .chat import summarize_page
+from .config import AppConfig
+from .models import NoteSummary, PageCategory, summary_schema_from_category
+from .note import WebNote
+from . import service as service_mod
+def _resolve_model(args: Namespace, cfg: AppConfig, console: Console) -> str | None:
+    requested = args.model or cfg.openai.default_model
+    if requested not in cfg.openai.models:
+        console.print(
+            f'[red]Model {requested!r} is not in configured models: '
+            f'{cfg.openai.models}[/red]'
+        )
+        return None
+    return requested
+def _resolve_reasoning(args: Namespace, cfg: AppConfig) -> str:
+    return args.reasoning or cfg.openai.default_reasoning
+@common_args.postprocessor()
+def resolve_config(args: Namespace):
+    console = Console(stderr=True)
+    try:
+        cfg = AppConfig.load(args.config)
+    except (FileNotFoundError, ValueError) as e:
+        console.print(f'[red]{e}[/red]')
+        raise
+    args.model = _resolve_model(args, cfg, console)
+    if args.model is None:
+        raise ValueError('No model specified')
+    args.reasoning = _resolve_reasoning(args, cfg)
+    args.cfg = cfg
+    load_dotenv()
+@arggroup('Category')
+def category_args(parser: ArgParser):
+    parser.add_argument('--category', '-c', type=PageCategory, action=EnumAction)
+@category_args.apply()
+@commands.register('process', 'note',
+                   help='Process a note with the LLM and rewrite it.')
+async def process_note(args: Namespace):
+    console = Console(stderr=True)
+    console.log(f'Load {args.note}...')
+    note = WebNote(args.note)
+    if note.cryptic_processed and not args.force:
+        console.log('[red] Note already processed and not --force, exiting.')
+        return 1
+    if args.category:
+        schema = summary_schema_from_category(args.category)
+        console.log(f'[yellow] Forcing {schema} as Schema')
+    else:
+        schema = NoteSummary
+    client = AsyncOpenAI()
+    try:
+        with console.status(f'[bold blue]Wait for OpenAI response...'):
+            summary, completion = await summarize_page(
+                client,
+                note.content,
+                model=args.model,
+                system_prompt=args.cfg.prompt.text,
+                reasoning=args.reasoning,
+                schema=schema,
+            )
+    finally:
+        await client.close()
+    if summary is None:
+        console.print(f'[red] Error processing note!')
+        return 1
+    console.log(f'Processed note using {completion.usage.total_tokens} tokens.')
+    console.print(summary)
+    if args.backup:
+        console.log('Backup note...')
+        shutil.copy(args.note, args.note.with_suffix('.bak'))
+    console.log('Update and save note...')
+    note.process_summary(summary)
+    note.save()
+    console.rule('Processed Note')
+    note.to_console(console)
+    return 0
+@process_note.args()
+def _(parser: ArgParser):
+    parser.add_argument('--note', '-i', type=Path, required=True)
+    parser.add_argument('--force', '-f', default=False, action='store_true')
+    parser.add_argument('--backup', '-b', default=False, action='store_true')
+@commands.register('service',
+                   help='Watch configured vault directories and process new notes.')
+async def service_cmd(args: Namespace):
+    console = Console(stderr=True)
+    svc = args.cfg.require_service()
+    if args.max_concurrent is not None:
+        svc.max_concurrent = args.max_concurrent
+    return await service_mod.run(
+        console=console,
+        cfg=args.cfg,
+        svc=svc,
+        model=args.model,
+        reasoning=args.reasoning,
+        once=args.once,
+    )
+@service_cmd.args()
+def _(parser: ArgParser):
+    parser.add_argument('--max-concurrent', type=int, default=None,
+                        help='Override service.max_concurrent from config.')
+    parser.add_argument('--once', default=False, action='store_true',
+                        help='Drain existing files and exit instead of watching.')

cryptic/config.py ADDED Viewed

@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : config.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+from __future__ import annotations
+from importlib.resources import files as _pkg_files
+import os
+from pathlib import Path
+from typing import Literal, Self
+import yaml
+from pydantic import BaseModel, Field, ValidationError, model_validator
+DEFAULT_MODELS = ['gpt-5.4-mini', 'gpt-5.4-nano-2026-03-17']
+DEFAULT_MODEL = 'gpt-5.4-mini'
+ReasoningLevel = Literal['low', 'medium', 'high', 'xhigh']
+REASONING_LEVELS: tuple[ReasoningLevel, ...] = ('low', 'medium', 'high', 'xhigh')
+DEFAULT_REASONING: ReasoningLevel = 'medium'
+def _xdg_default_config_path() -> Path:
+    base = os.environ.get('XDG_CONFIG_HOME')
+    root = Path(base) if base else Path.home() / '.config'
+    return root / 'cryptic' / 'config.yaml'
+def _packaged_prompt_text() -> str:
+    return (_pkg_files('cryptic') / 'prompts' / 'categorize.txt').read_text(encoding='utf-8').strip()
+class OpenAICfg(BaseModel):
+    models: list[str] = Field(default_factory=lambda: list(DEFAULT_MODELS))
+    default_model: str = DEFAULT_MODEL
+    default_reasoning: ReasoningLevel = DEFAULT_REASONING
+    @model_validator(mode='after')
+    def _default_in_models(self) -> Self:
+        if self.default_model not in self.models:
+            raise ValueError(
+                f'default_model {self.default_model!r} is not in openai.models {self.models}'
+            )
+        return self
+class PromptCfg(BaseModel):
+    path: Path | None = None
+    text: str | None = None
+    @model_validator(mode='after')
+    def _resolve(self) -> Self:
+        if self.path is not None and self.text is not None:
+            raise ValueError('prompt: set exactly one of `path` or `text`, not both')
+        if self.path is not None:
+            resolved = Path(self.path).expanduser().resolve()
+            self.text = resolved.read_text(encoding='utf-8').strip()
+        return self
+class VaultCfg(BaseModel):
+    input_dir: Path
+    output_dir: Path
+    originals_dir: Path
+    name: str | None = None
+    @model_validator(mode='after')
+    def _expand(self) -> Self:
+        self.input_dir = Path(self.input_dir).expanduser().resolve()
+        self.output_dir = Path(self.output_dir).expanduser().resolve()
+        self.originals_dir = Path(self.originals_dir).expanduser().resolve()
+        return self
+class ServiceCfg(BaseModel):
+    vaults: dict[str, VaultCfg]
+    max_concurrent: int = 3
+    max_tries: int = 3
+    pickup_delay_seconds: float = 3.0
+    @model_validator(mode='after')
+    def _check(self) -> Self:
+        if not self.vaults:
+            raise ValueError('service.vaults must define at least one vault')
+        seen: dict[Path, str] = {}
+        for name, vault in self.vaults.items():
+            vault.name = name
+            if vault.input_dir in seen:
+                raise ValueError(
+                    f'vaults {seen[vault.input_dir]!r} and {name!r} share '
+                    f'input_dir {vault.input_dir}; input_dirs must be distinct'
+                )
+            seen[vault.input_dir] = name
+        if self.max_concurrent < 1:
+            raise ValueError('service.max_concurrent must be >= 1')
+        if self.max_tries < 1:
+            raise ValueError('service.max_tries must be >= 1')
+        if self.pickup_delay_seconds < 0:
+            raise ValueError('service.pickup_delay_seconds must be >= 0')
+        return self
+class AppConfig(BaseModel):
+    openai: OpenAICfg = Field(default_factory=OpenAICfg)
+    prompt: PromptCfg = Field(default_factory=PromptCfg)
+    service: ServiceCfg | None = None
+    @model_validator(mode='after')
+    def _default_prompt(self) -> Self:
+        if self.prompt.text is None and self.prompt.path is None:
+            self.prompt = PromptCfg(text=_packaged_prompt_text())
+        return self
+    @classmethod
+    def load(cls, path: Path | None) -> AppConfig:
+        if path is not None:
+            path = Path(path).expanduser().resolve()
+            if not path.exists():
+                raise FileNotFoundError(f'config file not found: {path}')
+            return cls._from_file(path)
+        default = _xdg_default_config_path()
+        if default.exists():
+            return cls._from_file(default)
+        return cls()
+    @classmethod
+    def _from_file(cls, path: Path) -> AppConfig:
+        with path.open('r', encoding='utf-8') as fp:
+            raw = yaml.safe_load(fp) or {}
+        try:
+            return cls.model_validate(raw)
+        except ValidationError as e:
+            raise ValueError(f'invalid config at {path}:\n{e}') from e
+    def require_service(self) -> ServiceCfg:
+        if self.service is None:
+            raise ValueError(
+                'service config required: add a `service:` section to your config.yaml '
+                f'(default location: {_xdg_default_config_path()})'
+            )
+        return self.service

cryptic/markdown.py ADDED Viewed

@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# File   : markdown.py
+# License: MIT
+# Author : Camille Scott <camille.scott.w@gmail.com>
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Callable, Literal
+from pydantic import BaseModel
+from pydantic.fields import FieldInfo
+Style = Literal['paragraph', 'bullets', 'numbered']
+@dataclass(frozen=True)
+class MdSection:
+    """Render this field as a section in the note body.
+    Header text comes from the field's `serialization_alias`. If neither
+    `header` nor `serialization_alias` is set, the section is emitted
+    without a header.
+    When `skip_empty` is True (the default), a string value of 'unknown'
+    or 'none' (case-insensitive) also suppresses the entire section,
+    header included. Truly empty values (None, empty string, empty list)
+    are always suppressed regardless of `skip_empty`.
+    """
+    depth: int = 2
+    style: Style = 'paragraph'
+    header: str | None = None
+    skip_empty: bool = True
+@dataclass(frozen=True)
+class MdSkip:
+    """Explicit marker: this field is intentionally not in the body."""
+    pass
+@dataclass(frozen=True)
+class MdFrontmatter:
+    """Map this field into the note's YAML frontmatter under `key`.
+    A field can carry multiple MdFrontmatter annotations to write the
+    same source value to several frontmatter keys.
+    """
+    key: str
+    transform: Callable[[Any], Any] | None = None
+def render(model: BaseModel) -> str:
+    chunks: list[str] = []
+    for name, fi in type(model).model_fields.items():
+        meta = _md_meta(fi)
+        if meta is None or isinstance(meta, MdSkip):
+            continue
+        value = getattr(model, name)
+        if _should_skip(value, meta):
+            continue
+        chunks.append(_render_section(value, meta, _header_for(meta, fi)))
+    return '\n\n'.join(chunks)
+def apply_frontmatter(model: BaseModel, target: dict[str, Any]) -> None:
+    dumped = model.model_dump()
+    for name, fi in type(model).model_fields.items():
+        value = dumped.get(name)
+        for m in fi.metadata:
+            if not isinstance(m, MdFrontmatter):
+                continue
+            target[m.key] = m.transform(value) if m.transform else value
+def _md_meta(fi: FieldInfo) -> MdSection | MdSkip | None:
+    for m in fi.metadata:
+        if isinstance(m, (MdSection, MdSkip)):
+            return m
+    return None
+def _header_for(meta: MdSection, fi: FieldInfo) -> str | None:
+    return meta.header or fi.serialization_alias
+_EMPTY_SENTINELS = frozenset({'unknown', 'none'})
+def _should_skip(value: Any, meta: MdSection) -> bool:
+    if value is None:
+        return True
+    if isinstance(value, str) and not value.strip():
+        return True
+    if isinstance(value, (list, tuple)) and not value:
+        return True
+    if meta.skip_empty and isinstance(value, str):
+        if value.strip().lower() in _EMPTY_SENTINELS:
+            return True
+    return False
+def _render_section(value: Any, meta: MdSection, header: str | None) -> str:
+    parts: list[str] = []
+    if header:
+        parts.append('#' * meta.depth + ' ' + header)
+    if meta.style == 'paragraph':
+        parts.append(str(value))
+    elif meta.style == 'bullets':
+        parts.append('\n'.join(f'- {item}' for item in value))
+    elif meta.style == 'numbered':
+        parts.append('\n'.join(f'{i + 1}. {item}' for i, item in enumerate(value)))
+    return '\n\n'.join(parts)