cryptic-md 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cryptic/__init__.py ADDED
@@ -0,0 +1 @@
1
+ __version__ = '1.0.2'
cryptic/__main__.py ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : __main__.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+ # Date : 29.10.2024
7
+ # (c) Camille Scott, 2024
8
+
9
+
10
+ import sys
11
+
12
+ from .cmds import commands
13
+
14
+
15
+ def main():
16
+ return commands.run()
17
+
18
+
19
+ if __name__ == '__main__':
20
+ sys.exit(main())
cryptic/args.py ADDED
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : args.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+ # Date : 31.10.2024
7
+ # (c) Camille Scott, 2024
8
+
9
+ from argparse import Action
10
+ from enum import Enum
11
+ from pathlib import Path
12
+
13
+ from ponderosa import CmdTree, ArgParser, arggroup
14
+
15
+
16
+ class EnumAction(Action):
17
+ """
18
+ Argparse action for handling Enums
19
+ """
20
+ def __init__(self, **kwargs):
21
+ # Pop off the type value
22
+ enum = kwargs.pop("type", None)
23
+
24
+ # Ensure an Enum subclass is provided
25
+ if enum is None:
26
+ raise ValueError("type must be assigned an Enum when using EnumAction")
27
+ if not issubclass(enum, Enum):
28
+ raise TypeError("type must be an Enum when using EnumAction")
29
+
30
+ # Generate choices from the Enum
31
+ kwargs.setdefault("choices", tuple(e.name for e in enum))
32
+
33
+ super(EnumAction, self).__init__(**kwargs)
34
+
35
+ self._enum = enum
36
+
37
+ def __call__(self, parser, namespace, values, option_string=None):
38
+ # Convert value back into an Enum
39
+ enum = self._enum[values]
40
+ setattr(namespace, self.dest, enum)
41
+
42
+
43
+ commands = CmdTree()
44
+
45
+
46
+ @commands.root.args("Config", common=True)
47
+ def common_args(parser: ArgParser):
48
+ parser.add_argument('--config', type=Path, default=None,
49
+ help='Path to YAML config file.')
50
+ parser.add_argument('--model', type=str, default=None,
51
+ help='Override the configured default model.')
52
+ parser.add_argument('--reasoning', type=str, default=None,
53
+ choices=['low', 'medium', 'high', 'xhigh'],
54
+ help='Override the configured reasoning effort.')
55
+
cryptic/chat.py ADDED
@@ -0,0 +1,35 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : chat.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+ # Date : 23.10.2024
7
+ # (c) Camille Scott, 2024
8
+
9
+ from typing import Type
10
+
11
+ from openai import AsyncOpenAI
12
+ from openai.types.chat import ChatCompletion
13
+
14
+ from .models import BaseNoteSummary, NoteSummary
15
+
16
+
17
+ async def summarize_page(
18
+ client: AsyncOpenAI,
19
+ content: str,
20
+ *,
21
+ model: str,
22
+ system_prompt: str,
23
+ reasoning: str,
24
+ schema: Type[BaseNoteSummary] = NoteSummary,
25
+ ) -> tuple[BaseNoteSummary | None, ChatCompletion]:
26
+ completion = await client.chat.completions.parse(
27
+ model=model,
28
+ messages=[
29
+ {"role": "system", "content": system_prompt},
30
+ {"role": "user", "content": content},
31
+ ],
32
+ response_format=schema,
33
+ reasoning_effort=reasoning,
34
+ )
35
+ return completion.choices[0].message.parsed, completion
cryptic/cmds.py ADDED
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : cmds.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+ # Date : 28.10.2024
7
+ # (c) Camille Scott, 2024
8
+
9
+ from argparse import Namespace
10
+ from pathlib import Path
11
+ import shutil
12
+
13
+ from dotenv import load_dotenv
14
+ from openai import AsyncOpenAI
15
+ from rich.console import Console
16
+
17
+ from .args import ArgParser, arggroup, commands, common_args, EnumAction
18
+ from .chat import summarize_page
19
+ from .config import AppConfig
20
+ from .models import NoteSummary, PageCategory, summary_schema_from_category
21
+ from .note import WebNote
22
+ from . import service as service_mod
23
+
24
+
25
+ def _resolve_model(args: Namespace, cfg: AppConfig, console: Console) -> str | None:
26
+ requested = args.model or cfg.openai.default_model
27
+ if requested not in cfg.openai.models:
28
+ console.print(
29
+ f'[red]Model {requested!r} is not in configured models: '
30
+ f'{cfg.openai.models}[/red]'
31
+ )
32
+ return None
33
+ return requested
34
+
35
+
36
+ def _resolve_reasoning(args: Namespace, cfg: AppConfig) -> str:
37
+ return args.reasoning or cfg.openai.default_reasoning
38
+
39
+
40
+ @common_args.postprocessor()
41
+ def resolve_config(args: Namespace):
42
+ console = Console(stderr=True)
43
+ try:
44
+ cfg = AppConfig.load(args.config)
45
+ except (FileNotFoundError, ValueError) as e:
46
+ console.print(f'[red]{e}[/red]')
47
+ raise
48
+
49
+ args.model = _resolve_model(args, cfg, console)
50
+ if args.model is None:
51
+ raise ValueError('No model specified')
52
+
53
+ args.reasoning = _resolve_reasoning(args, cfg)
54
+ args.cfg = cfg
55
+
56
+ load_dotenv()
57
+
58
+
59
+ @arggroup('Category')
60
+ def category_args(parser: ArgParser):
61
+ parser.add_argument('--category', '-c', type=PageCategory, action=EnumAction)
62
+
63
+
64
+ @category_args.apply()
65
+ @commands.register('process', 'note',
66
+ help='Process a note with the LLM and rewrite it.')
67
+ async def process_note(args: Namespace):
68
+ console = Console(stderr=True)
69
+
70
+
71
+ console.log(f'Load {args.note}...')
72
+ note = WebNote(args.note)
73
+
74
+ if note.cryptic_processed and not args.force:
75
+ console.log('[red] Note already processed and not --force, exiting.')
76
+ return 1
77
+
78
+ if args.category:
79
+ schema = summary_schema_from_category(args.category)
80
+ console.log(f'[yellow] Forcing {schema} as Schema')
81
+ else:
82
+ schema = NoteSummary
83
+
84
+ client = AsyncOpenAI()
85
+ try:
86
+ with console.status(f'[bold blue]Wait for OpenAI response...'):
87
+ summary, completion = await summarize_page(
88
+ client,
89
+ note.content,
90
+ model=args.model,
91
+ system_prompt=args.cfg.prompt.text,
92
+ reasoning=args.reasoning,
93
+ schema=schema,
94
+ )
95
+ finally:
96
+ await client.close()
97
+
98
+ if summary is None:
99
+ console.print(f'[red] Error processing note!')
100
+ return 1
101
+
102
+ console.log(f'Processed note using {completion.usage.total_tokens} tokens.')
103
+ console.print(summary)
104
+
105
+ if args.backup:
106
+ console.log('Backup note...')
107
+ shutil.copy(args.note, args.note.with_suffix('.bak'))
108
+
109
+ console.log('Update and save note...')
110
+ note.process_summary(summary)
111
+ note.save()
112
+
113
+ console.rule('Processed Note')
114
+ note.to_console(console)
115
+
116
+ return 0
117
+
118
+
119
+ @process_note.args()
120
+ def _(parser: ArgParser):
121
+ parser.add_argument('--note', '-i', type=Path, required=True)
122
+ parser.add_argument('--force', '-f', default=False, action='store_true')
123
+ parser.add_argument('--backup', '-b', default=False, action='store_true')
124
+
125
+
126
+ @commands.register('service',
127
+ help='Watch configured vault directories and process new notes.')
128
+ async def service_cmd(args: Namespace):
129
+ console = Console(stderr=True)
130
+
131
+ svc = args.cfg.require_service()
132
+ if args.max_concurrent is not None:
133
+ svc.max_concurrent = args.max_concurrent
134
+
135
+ return await service_mod.run(
136
+ console=console,
137
+ cfg=args.cfg,
138
+ svc=svc,
139
+ model=args.model,
140
+ reasoning=args.reasoning,
141
+ once=args.once,
142
+ )
143
+
144
+
145
+ @service_cmd.args()
146
+ def _(parser: ArgParser):
147
+ parser.add_argument('--max-concurrent', type=int, default=None,
148
+ help='Override service.max_concurrent from config.')
149
+ parser.add_argument('--once', default=False, action='store_true',
150
+ help='Drain existing files and exit instead of watching.')
cryptic/config.py ADDED
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : config.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+
7
+ from __future__ import annotations
8
+
9
+ from importlib.resources import files as _pkg_files
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Literal, Self
13
+
14
+ import yaml
15
+ from pydantic import BaseModel, Field, ValidationError, model_validator
16
+
17
+
18
+ DEFAULT_MODELS = ['gpt-5.4-mini', 'gpt-5.4-nano-2026-03-17']
19
+ DEFAULT_MODEL = 'gpt-5.4-mini'
20
+
21
+ ReasoningLevel = Literal['low', 'medium', 'high', 'xhigh']
22
+ REASONING_LEVELS: tuple[ReasoningLevel, ...] = ('low', 'medium', 'high', 'xhigh')
23
+ DEFAULT_REASONING: ReasoningLevel = 'medium'
24
+
25
+
26
+ def _xdg_default_config_path() -> Path:
27
+ base = os.environ.get('XDG_CONFIG_HOME')
28
+ root = Path(base) if base else Path.home() / '.config'
29
+ return root / 'cryptic' / 'config.yaml'
30
+
31
+
32
+ def _packaged_prompt_text() -> str:
33
+ return (_pkg_files('cryptic') / 'prompts' / 'categorize.txt').read_text(encoding='utf-8').strip()
34
+
35
+
36
+ class OpenAICfg(BaseModel):
37
+ models: list[str] = Field(default_factory=lambda: list(DEFAULT_MODELS))
38
+ default_model: str = DEFAULT_MODEL
39
+ default_reasoning: ReasoningLevel = DEFAULT_REASONING
40
+
41
+ @model_validator(mode='after')
42
+ def _default_in_models(self) -> Self:
43
+ if self.default_model not in self.models:
44
+ raise ValueError(
45
+ f'default_model {self.default_model!r} is not in openai.models {self.models}'
46
+ )
47
+ return self
48
+
49
+
50
+ class PromptCfg(BaseModel):
51
+ path: Path | None = None
52
+ text: str | None = None
53
+
54
+ @model_validator(mode='after')
55
+ def _resolve(self) -> Self:
56
+ if self.path is not None and self.text is not None:
57
+ raise ValueError('prompt: set exactly one of `path` or `text`, not both')
58
+ if self.path is not None:
59
+ resolved = Path(self.path).expanduser().resolve()
60
+ self.text = resolved.read_text(encoding='utf-8').strip()
61
+ return self
62
+
63
+
64
+ class VaultCfg(BaseModel):
65
+ input_dir: Path
66
+ output_dir: Path
67
+ originals_dir: Path
68
+ name: str | None = None
69
+
70
+ @model_validator(mode='after')
71
+ def _expand(self) -> Self:
72
+ self.input_dir = Path(self.input_dir).expanduser().resolve()
73
+ self.output_dir = Path(self.output_dir).expanduser().resolve()
74
+ self.originals_dir = Path(self.originals_dir).expanduser().resolve()
75
+ return self
76
+
77
+
78
+ class ServiceCfg(BaseModel):
79
+ vaults: dict[str, VaultCfg]
80
+ max_concurrent: int = 3
81
+ max_tries: int = 3
82
+ pickup_delay_seconds: float = 3.0
83
+
84
+ @model_validator(mode='after')
85
+ def _check(self) -> Self:
86
+ if not self.vaults:
87
+ raise ValueError('service.vaults must define at least one vault')
88
+ seen: dict[Path, str] = {}
89
+ for name, vault in self.vaults.items():
90
+ vault.name = name
91
+ if vault.input_dir in seen:
92
+ raise ValueError(
93
+ f'vaults {seen[vault.input_dir]!r} and {name!r} share '
94
+ f'input_dir {vault.input_dir}; input_dirs must be distinct'
95
+ )
96
+ seen[vault.input_dir] = name
97
+ if self.max_concurrent < 1:
98
+ raise ValueError('service.max_concurrent must be >= 1')
99
+ if self.max_tries < 1:
100
+ raise ValueError('service.max_tries must be >= 1')
101
+ if self.pickup_delay_seconds < 0:
102
+ raise ValueError('service.pickup_delay_seconds must be >= 0')
103
+ return self
104
+
105
+
106
+ class AppConfig(BaseModel):
107
+ openai: OpenAICfg = Field(default_factory=OpenAICfg)
108
+ prompt: PromptCfg = Field(default_factory=PromptCfg)
109
+ service: ServiceCfg | None = None
110
+
111
+ @model_validator(mode='after')
112
+ def _default_prompt(self) -> Self:
113
+ if self.prompt.text is None and self.prompt.path is None:
114
+ self.prompt = PromptCfg(text=_packaged_prompt_text())
115
+ return self
116
+
117
+ @classmethod
118
+ def load(cls, path: Path | None) -> AppConfig:
119
+ if path is not None:
120
+ path = Path(path).expanduser().resolve()
121
+ if not path.exists():
122
+ raise FileNotFoundError(f'config file not found: {path}')
123
+ return cls._from_file(path)
124
+
125
+ default = _xdg_default_config_path()
126
+ if default.exists():
127
+ return cls._from_file(default)
128
+
129
+ return cls()
130
+
131
+ @classmethod
132
+ def _from_file(cls, path: Path) -> AppConfig:
133
+ with path.open('r', encoding='utf-8') as fp:
134
+ raw = yaml.safe_load(fp) or {}
135
+ try:
136
+ return cls.model_validate(raw)
137
+ except ValidationError as e:
138
+ raise ValueError(f'invalid config at {path}:\n{e}') from e
139
+
140
+ def require_service(self) -> ServiceCfg:
141
+ if self.service is None:
142
+ raise ValueError(
143
+ 'service config required: add a `service:` section to your config.yaml '
144
+ f'(default location: {_xdg_default_config_path()})'
145
+ )
146
+ return self.service
cryptic/markdown.py ADDED
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # File : markdown.py
4
+ # License: MIT
5
+ # Author : Camille Scott <camille.scott.w@gmail.com>
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Callable, Literal
11
+
12
+ from pydantic import BaseModel
13
+ from pydantic.fields import FieldInfo
14
+
15
+
16
+ Style = Literal['paragraph', 'bullets', 'numbered']
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class MdSection:
21
+ """Render this field as a section in the note body.
22
+
23
+ Header text comes from the field's `serialization_alias`. If neither
24
+ `header` nor `serialization_alias` is set, the section is emitted
25
+ without a header.
26
+
27
+ When `skip_empty` is True (the default), a string value of 'unknown'
28
+ or 'none' (case-insensitive) also suppresses the entire section,
29
+ header included. Truly empty values (None, empty string, empty list)
30
+ are always suppressed regardless of `skip_empty`.
31
+ """
32
+ depth: int = 2
33
+ style: Style = 'paragraph'
34
+ header: str | None = None
35
+ skip_empty: bool = True
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class MdSkip:
40
+ """Explicit marker: this field is intentionally not in the body."""
41
+ pass
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class MdFrontmatter:
46
+ """Map this field into the note's YAML frontmatter under `key`.
47
+
48
+ A field can carry multiple MdFrontmatter annotations to write the
49
+ same source value to several frontmatter keys.
50
+ """
51
+ key: str
52
+ transform: Callable[[Any], Any] | None = None
53
+
54
+
55
+ def render(model: BaseModel) -> str:
56
+ chunks: list[str] = []
57
+ for name, fi in type(model).model_fields.items():
58
+ meta = _md_meta(fi)
59
+ if meta is None or isinstance(meta, MdSkip):
60
+ continue
61
+ value = getattr(model, name)
62
+ if _should_skip(value, meta):
63
+ continue
64
+ chunks.append(_render_section(value, meta, _header_for(meta, fi)))
65
+ return '\n\n'.join(chunks)
66
+
67
+
68
+ def apply_frontmatter(model: BaseModel, target: dict[str, Any]) -> None:
69
+ dumped = model.model_dump()
70
+ for name, fi in type(model).model_fields.items():
71
+ value = dumped.get(name)
72
+ for m in fi.metadata:
73
+ if not isinstance(m, MdFrontmatter):
74
+ continue
75
+ target[m.key] = m.transform(value) if m.transform else value
76
+
77
+
78
+ def _md_meta(fi: FieldInfo) -> MdSection | MdSkip | None:
79
+ for m in fi.metadata:
80
+ if isinstance(m, (MdSection, MdSkip)):
81
+ return m
82
+ return None
83
+
84
+
85
+ def _header_for(meta: MdSection, fi: FieldInfo) -> str | None:
86
+ return meta.header or fi.serialization_alias
87
+
88
+
89
+ _EMPTY_SENTINELS = frozenset({'unknown', 'none'})
90
+
91
+
92
+ def _should_skip(value: Any, meta: MdSection) -> bool:
93
+ if value is None:
94
+ return True
95
+ if isinstance(value, str) and not value.strip():
96
+ return True
97
+ if isinstance(value, (list, tuple)) and not value:
98
+ return True
99
+ if meta.skip_empty and isinstance(value, str):
100
+ if value.strip().lower() in _EMPTY_SENTINELS:
101
+ return True
102
+ return False
103
+
104
+
105
+ def _render_section(value: Any, meta: MdSection, header: str | None) -> str:
106
+ parts: list[str] = []
107
+ if header:
108
+ parts.append('#' * meta.depth + ' ' + header)
109
+ if meta.style == 'paragraph':
110
+ parts.append(str(value))
111
+ elif meta.style == 'bullets':
112
+ parts.append('\n'.join(f'- {item}' for item in value))
113
+ elif meta.style == 'numbered':
114
+ parts.append('\n'.join(f'{i + 1}. {item}' for i, item in enumerate(value)))
115
+ return '\n\n'.join(parts)