omlish 0.0.0.dev493__py3-none-any.whl → 0.0.0.dev506__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omlish might be problematic. Click here for more details.
- omlish/CODESTYLE.md +345 -0
- omlish/README.md +2 -2
- omlish/__about__.py +6 -4
- omlish/_check.cc +209 -0
- omlish/check.py +11 -0
- omlish/dataclasses/__init__.py +4 -0
- omlish/dataclasses/impl/concerns/frozen.py +4 -1
- omlish/dataclasses/tools/replace.py +27 -0
- omlish/dispatch/functions.py +1 -1
- omlish/formats/json/stream/lexing.py +13 -5
- omlish/formats/json/stream/parsing.py +1 -1
- omlish/inject/README.md +430 -0
- omlish/inject/__init__.py +1 -0
- omlish/inject/_dataclasses.py +64 -64
- omlish/inject/eagers.py +0 -4
- omlish/inject/elements.py +4 -0
- omlish/inject/helpers/late.py +1 -1
- omlish/inject/helpers/managed.py +27 -24
- omlish/inject/impl/injector.py +7 -22
- omlish/inject/impl/inspect.py +0 -8
- omlish/inject/impl/origins.py +1 -0
- omlish/inject/impl/privates.py +2 -6
- omlish/inject/impl/providers.py +0 -4
- omlish/inject/impl/scopes.py +14 -18
- omlish/inject/inspect.py +9 -0
- omlish/inject/multis.py +0 -3
- omlish/inject/scopes.py +7 -5
- omlish/io/buffers.py +35 -8
- omlish/lang/__init__.py +8 -0
- omlish/lang/classes/simple.py +2 -1
- omlish/lang/iterables.py +6 -0
- omlish/lang/objects.py +13 -0
- omlish/lang/outcomes.py +1 -1
- omlish/lang/recursion.py +1 -1
- omlish/lang/sequences.py +33 -0
- omlish/lifecycles/_dataclasses.py +18 -18
- omlish/lifecycles/injection.py +4 -4
- omlish/lite/maybes.py +7 -0
- omlish/lite/typing.py +15 -0
- omlish/logs/all.py +11 -0
- omlish/logs/base.py +3 -3
- omlish/logs/bisync.py +99 -0
- omlish/marshal/_dataclasses.py +32 -32
- omlish/specs/jmespath/_dataclasses.py +38 -38
- omlish/specs/jsonschema/keywords/_dataclasses.py +24 -24
- omlish/typedvalues/_collection.cc +500 -0
- omlish/typedvalues/collection.py +159 -62
- omlish/typedvalues/generic.py +5 -4
- omlish/typedvalues/values.py +6 -0
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/METADATA +9 -7
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/RECORD +55 -50
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/WHEEL +0 -0
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/entry_points.txt +0 -0
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/licenses/LICENSE +0 -0
- {omlish-0.0.0.dev493.dist-info → omlish-0.0.0.dev506.dist-info}/top_level.txt +0 -0
omlish/CODESTYLE.md
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
### Environment
|
|
2
|
+
|
|
3
|
+
- Target cpython 3.13 - use the modern language and library features it includes.
|
|
4
|
+
- \[**lite**\] The exception is 'lite' code, which targets python 3.8.
|
|
5
|
+
- **A module is declared as being lite by having a `# @omlish-lite` comment at the top of it, or at the top of any
|
|
6
|
+
`__init__` module in its or any ancestor's package.**
|
|
7
|
+
- As a reminder, non-\[**lite**\] core is referred to as 'standard' code.
|
|
8
|
+
- Code should run on modern macOS and Linux - Windows support is not necessary, but still prefer things like
|
|
9
|
+
`os.path.join` over `'/'.join` where reasonable.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
### Dependencies
|
|
13
|
+
|
|
14
|
+
- Outside of a few specific subpackages (and test code), there are no external dependencies of any kind to rely on.
|
|
15
|
+
Use the standard library liberally, use `omlish` for everything else.
|
|
16
|
+
|
|
17
|
+
- All external runtime dependencies are optional, and generally fit into the following categories:
|
|
18
|
+
- Cryptography: `cryptography`. Its use is optional.
|
|
19
|
+
- File formats: `orjson`, `pyyaml`, `cbor2`, `lxml`, `cloudpickle`, etc. Wherever possible, they serve only as
|
|
20
|
+
accelerators and their absence will not block functionality - code should strive to have internal fallbacks that
|
|
21
|
+
prefer correctness to speed.
|
|
22
|
+
- Text formats: `jinja2`, `markdown-it-py`, etc. Particularly in LLM stuff the full versions of these are
|
|
23
|
+
unavoidable, but simplified internal 'equivalents' exist for simpler usecases.
|
|
24
|
+
- Compression algorithms: `lz4`, `zstandard`, `python-snappy`, `brotli`, etc. These generally have no internal
|
|
25
|
+
fallback if not present in the standard library, but are usually optional at runtime in practice.
|
|
26
|
+
- Database drivers: `pg8000`, `psycopg`, `psycopg2`, `mysql-connecteor-python`, `mysqlclient`, `pymysql`,
|
|
27
|
+
`snowflake-connector-python`, `duckdb`, etc. These also generally have no internal fallback.
|
|
28
|
+
- Large <span aria-label="math">m̶̡̢̡̢̠̥͎͇̯̥̹̪͇͇͇̺̟͋̓͂̇͝͝a̴̧̛̞̾̊͒̈́̿͗̓̐̊͝t̸̥͖͂̀̆͛́̅́͝͠ȟ̴̢͎͙͍̱̒͂́̆̽̽̈́͝</span> libraries: `numpy`, `torch`, `mlx`, `tinygrad`, `transformers`,
|
|
29
|
+
`llama-cpp-python`, `tokenizers`, etc. These tend to have gigantic interface surface areas compared to the
|
|
30
|
+
previous categories, and all interaction them is heavily quarantined to a single isolated package per dependency.
|
|
31
|
+
Outside of these isolated packages they absolutely cannot be assumed to be present.
|
|
32
|
+
- `textual`, specifically - it is the sole chosen TUI library. Almost all terminal functionality throughout the
|
|
33
|
+
codebase is usable without it - it only powers a small number of specific, larger TUI apps.
|
|
34
|
+
- Async backends: `trio`, `anyio`, `trio-asyncio`. Except in specific situations (such as under `textual`) async
|
|
35
|
+
code is not assumed to be running under asyncio, and in general async code should use anyio.
|
|
36
|
+
- **NOTE:** this is in flux.
|
|
37
|
+
- The 'hyper stack' for production web serving: `h11`, `h2`, `wsproto`. There are simpler internal http servers for
|
|
38
|
+
local and development use.
|
|
39
|
+
- Unique, focused, irreproducible, core utility libraries: `executing` / `asttokens`, `greenlet`, `wrapt`. In
|
|
40
|
+
general 'core' utility libraries are either avoided, replaced with an equivalent internal implementation, or in
|
|
41
|
+
small cases (often for \[**lite**\] code) vendored (preserving licenses, copyrights, and attribution) such as in
|
|
42
|
+
parts of `omdev.packaging`. However, there is a small number of libraries that do things I have absolutely no
|
|
43
|
+
interest in attempting or maintaining myself, such as those listed here. As with all other deps these are strictly
|
|
44
|
+
optional, and fallbacks exist wherever possible.
|
|
45
|
+
- `httpx` specifically. It is **NOT** required - various internal async http client options exist. It is however an
|
|
46
|
+
optional integration.
|
|
47
|
+
- Various other optional backends: `psutil`, `mwparserfromhell`, `regex`, `ddgs`, `tree-sitter`, etc.
|
|
48
|
+
- Notably absent from this list:
|
|
49
|
+
- `pydantic`. Use dataclasses.
|
|
50
|
+
- `click`. Use argparse.
|
|
51
|
+
- Any 'web client' library: `boto3`, `google-api-python-client`, `openai`, `anthropic`, etc. These are not used,
|
|
52
|
+
even optionally, in the codebase. All interaction with such api's is done with internal clients, usually via
|
|
53
|
+
dataclasses.
|
|
54
|
+
- Note: references to boto exist in code but only for code generation and cross-validation testing. Boto is not
|
|
55
|
+
used for production aws interaction.
|
|
56
|
+
- `gitpython`, `docker`, etc. Drive their cli's through a subprocess or talk to the api through the socket.
|
|
57
|
+
- `rich` (outside of `textual`). Use `omlish.term`, or simple inline escape codes, or just output plain text.
|
|
58
|
+
- `loguru` / `logbook` / `structlog`. Use `omlish.logs` or just stdlib `logging`.
|
|
59
|
+
- `json5`. Use `omlish.formats.json5`.
|
|
60
|
+
- Various specs: `jsonrpc`, `jsonschema`, `openapi`, `mcp`. Internal implementations exist.
|
|
61
|
+
- Web frameworks: `flask`, `fastapi`, `starlette`, etc. Equivalent internal patterns exist.
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
### Structure
|
|
65
|
+
|
|
66
|
+
- In general, strongly prefer clusters of small source files to a single or small number of large ones - a few hundred
|
|
67
|
+
lines of code is a good target maximum, and there is no minimum.
|
|
68
|
+
- Our code is 'type-heavy', and small modules defining nothing but interrelated stateless structures are welcome.
|
|
69
|
+
- While not necessarily the case, organize packages as if it were to be managed by a guice-style dependency injector,
|
|
70
|
+
with package-level injector modules that each refer to their child packages' injector modules and so on.
|
|
71
|
+
- With the exception of root packages (`om*` directories), (relatively) deep package nesting is preferred over large
|
|
72
|
+
flat packages with many, less tightly related modules.
|
|
73
|
+
- Structurally quarantine all interaction with any external dependencies.
|
|
74
|
+
- For small, simple integrations, it may be done within a single module as an optional implementation of an
|
|
75
|
+
interface (or simple function conforming to a signature)
|
|
76
|
+
- For intermediate integrations, prefer a separate module dedicated to that integration.
|
|
77
|
+
- For large, complex integrations, prefer a more root-level package for that integration, whose internal structure
|
|
78
|
+
mirrors that of the core code it's being integrated with.
|
|
79
|
+
- In general the structure should mirror a 'Clean' or 'Hexagonal' architecture:
|
|
80
|
+
> Source code dependencies can only point inwards. Nothing in an inner circle can know anything at all about
|
|
81
|
+
something in an outer circle.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
### Naming
|
|
85
|
+
|
|
86
|
+
- Module names should be nouns (usually plural or gerunds), not verbs, so as to not clash with function names. A
|
|
87
|
+
module should be named `parsing.py`, not `parse.py`, so `__init__.py` could `from .parsing import parse` without
|
|
88
|
+
shadowing the module itself.
|
|
89
|
+
- Function names should be verbs.
|
|
90
|
+
- When naming interface classes, the interface should be the 'bare' name, and implementations should have prefixes and
|
|
91
|
+
suffixes. For example, a user service interface would be `UserService`, with a `DbUserService` or `DictUserService`
|
|
92
|
+
subclass, or even a `UserServiceImpl` subclass if there is only one sensible initial implementation but it still
|
|
93
|
+
justifies being abstracted.
|
|
94
|
+
- When using acronyms, only the first letter of the acronym should be uppercased when it appears in CamelCased names
|
|
95
|
+
so as to distinguish it from adjacent acronyms. For example, a class to parse ABNF grammars would be `AbnfParser`,
|
|
96
|
+
and a class to parse the JSON ABNF grammar would be `JsonAbnfParser`.
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
### Imports
|
|
100
|
+
|
|
101
|
+
- **Always** use relative imports within a package. **Never** reference the name of the root package from within
|
|
102
|
+
itself. For example, within the `omlish` package, it's `from . import lang`, not `from omlish import lang`. Within
|
|
103
|
+
the `omlish` root package there should never be an import line containing the word `omlish` - and references to the
|
|
104
|
+
root name should be avoided in general.
|
|
105
|
+
- Use the following import aliases for the following modules if they are used:
|
|
106
|
+
- `import dataclasses as dc`
|
|
107
|
+
- `import typing as ta`
|
|
108
|
+
- For *package-internal* imports, almost always import specific items rather than whole modules or packages.
|
|
109
|
+
- Here, 'package-internal' is loosely defined as the layer which 'external' users of the code will treat as the
|
|
110
|
+
'public' interface.
|
|
111
|
+
- For other imports, strongly prefer to import a module or package, rather than importing specific items from it. So
|
|
112
|
+
for example, use `import typing as ta; fn: ta.Callable ...` as opposed to
|
|
113
|
+
`from typing import Callable; fn: Callable ...`, and `import dataclasses as dc; @dc.dataclass() ...` as opposed to
|
|
114
|
+
`from dataclasses import dataclass; @dataclass() ...`.
|
|
115
|
+
- Unless instructed or unavoidable, prefer to use only the standard library and the current existing codebase.
|
|
116
|
+
- Notable exceptions include:
|
|
117
|
+
- anyio - In general async code should write to anyio rather than asyncio (or trio) unless it is specifically
|
|
118
|
+
being written for a particular backend.
|
|
119
|
+
- **NOTE:** this is in flux.
|
|
120
|
+
- pytest - Write tests in pytest-style, and assume it is available.
|
|
121
|
+
- \[**lite**\] 'lite' code can have no external dependencies of any kind, and can only reference other 'lite' code.
|
|
122
|
+
- Lite async code uses only asyncio, and only uses functionality available in python 3.8.
|
|
123
|
+
- Lite tests are written with the unittest package.
|
|
124
|
+
- Unless forced to for external interoperability, avoid `pathlib` - use `os.path` instead.
|
|
125
|
+
- For heavy or optional imports, **ALWAYS** import whole modules, **not** individual module contents. For example, use
|
|
126
|
+
`import torch; t = torch.Tensor(...` rather than `from torch import Tensor; t = Tensor(...`.
|
|
127
|
+
- Rationale: we have a lazy import mechanism that operates at the module level. Do not attempt to manually
|
|
128
|
+
late-import such libraries, just import them as regular modules.
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
### Modules
|
|
132
|
+
|
|
133
|
+
- Avoid global state in general. Constants are however fine.
|
|
134
|
+
- Do basically no 'work' in module body:
|
|
135
|
+
- *NEVER* eagerly do any IO in module body - wrap any such things in a `@lang.cached_function`.
|
|
136
|
+
- *NEVER* write dumb python 'scripts'. Modules intended as entrypoints are great, but *ALWAYS* make them importable
|
|
137
|
+
side-effect-free, and *ALWAYS* have an `if __name__ == '__main__'` guard at the bottom. And almost always have
|
|
138
|
+
that just call a `def _main() -> None:` or `async def _a_main() -> None:`. Don't pollute module globals with state
|
|
139
|
+
even if the module is running as entrypoint.
|
|
140
|
+
- Avoid temporary values in module global scope - for example, construction of a global effectively const `Mapping`
|
|
141
|
+
via a comprehension is fine in module body (as comprehension variables do not leak out to parent scope), but a bare
|
|
142
|
+
for loop in module body is not okay (as the loop variables and any intermediates in the loop body will be left as
|
|
143
|
+
globals). Instead, prefer to define and call a module private function which returns the desired global value.
|
|
144
|
+
- Always use relative imports even in python modules intended to be directly executed. All python invocations will
|
|
145
|
+
always be done via `python -m`.
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
### Classes
|
|
149
|
+
|
|
150
|
+
- Ensure constructors call `super().__init__()`, even if they don't appear to inherit from anything at their
|
|
151
|
+
definition - *except* if the class is `@ta.Final` and there is explicit reason to not.
|
|
152
|
+
- A blank line should follow the super call if it is the first statement of the method (which it usually is) and
|
|
153
|
+
there are more statements in the method.
|
|
154
|
+
- Prefer to use dataclasses for even moderately complex usecases - if there are, say, more than a 2-element tuple, a
|
|
155
|
+
dataclass should probably be used.
|
|
156
|
+
- `ta.NamedTuple` still has limited usecases, such as replacing a function's return type from an anonymous tuple to a
|
|
157
|
+
named one (to allow it to be destructured by callers as before), or as a cache key, but in general almost always
|
|
158
|
+
prefer dataclasses.
|
|
159
|
+
- If a number of related functions are passing around a growing number of the common args/kwargs, don't be shy about
|
|
160
|
+
refactoring them into methods on a common class with shared immutable and mutable state - if the class is considered
|
|
161
|
+
private implementation detail and not part of any public api.
|
|
162
|
+
- For any necessary global state involving multiple interrelated variables, consider encapsulating it in a class, even
|
|
163
|
+
if it's only a private singleton.
|
|
164
|
+
- If appropriate, lean towards stateless classes, taking dependencies as constructor arguments and not mutating them
|
|
165
|
+
once set, and passing around and returning dataclasses for intermediate data. If however significant mutable shared
|
|
166
|
+
state is involved just use regular private class fields.
|
|
167
|
+
- Such dependencies should not be exposed publicly for other code to lazily piggyback off of: avoid transitive
|
|
168
|
+
dependencies.
|
|
169
|
+
- While not necessarily the case, write code as if it were to be managed by a constructor-injecting guice-style
|
|
170
|
+
dependency injector.
|
|
171
|
+
- Ideally, classes have their functional dependencies as ctor kwargs, and if possible these have sensible defaults.
|
|
172
|
+
- In such cases, prefer to have a kwarg default value for primitives and immutable values, otherwise prefer an
|
|
173
|
+
` | None = None` kwarg and instantiate the default value in the ctor if necessary.
|
|
174
|
+
- Strongly prefer composition over inheritance.
|
|
175
|
+
- Prefer relatively fine-grained dependency decomposition.
|
|
176
|
+
- **STRONGLY** avoid giant, monolithic classes containing _eVeRyThInG_ anyone will ever need. Avoid classes like
|
|
177
|
+
`AppContext` and `AppConfig` each having large number of fields, *even if* those fields are arbitrarily deeply
|
|
178
|
+
nested within otherwise well-typed child objects.
|
|
179
|
+
- For situations in which different behaviors are necessary, prefer to define an interface or abstract class with
|
|
180
|
+
`@abc.abstractmethod` members, and write multiple implementations of them as warranted. Prefer to refer to the
|
|
181
|
+
interface in type annotations unless it must specifically refer to a given implementation.
|
|
182
|
+
- Protocols are to be used sparingly where they make sense. In general, nominal typing is a good thing - it is
|
|
183
|
+
desirable that not all functions that return an `int` are `UserIdProvider`'s.
|
|
184
|
+
- An abstract class with nothing but abstract (or constant) members is referred to as an interface. In general, prefer
|
|
185
|
+
pure interfaces as opposed to full abstract classes containing partial implementations at package public interface
|
|
186
|
+
boundaries.
|
|
187
|
+
- Do not use `abc.ABC` - in standard code use `lang.Abstract` and in lite code use `omlish.lite.abstract.Abstract`.
|
|
188
|
+
- Rationale: `abc.ABCMeta` adds extreme overhead to `isinstance` / `issubclass` checks (6x) in order to support
|
|
189
|
+
virtual base classes, which are almost never needed or desirable.
|
|
190
|
+
- Abstract methods should always do nothing but `raise NotImplementedError` - but they *must* do that.
|
|
191
|
+
- Properties should be free of side-effects.
|
|
192
|
+
- Rationale: Many utilities eagerly inspect properties at runtime, even private (underscore-prefixed) ones, so they
|
|
193
|
+
cannot alter state.
|
|
194
|
+
- Outside of rare, specific instances, **DO NOT** expose mutable internal class state.
|
|
195
|
+
- Keep all mutable state private as single-underscore-prefixed fields.
|
|
196
|
+
- As necessary for usage, expose internal state via methods or `@property`'s. For such cases do one of the
|
|
197
|
+
following:
|
|
198
|
+
- Type-annotate the return type as immutable. For example, a property exposing an internal `list[int]` would be
|
|
199
|
+
annotated as returning a `ta.Sequence[int]`, and a `dict[int, str]` would be annotated as returning a
|
|
200
|
+
`ta.Mapping[int, str]`.
|
|
201
|
+
- Return a defensive copy of the internal state. For example, a property returning an internal `list[int]` would
|
|
202
|
+
return a copy of the internal list.
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
### Dataclasses
|
|
206
|
+
|
|
207
|
+
- Do not use bare, un-called `@dc.dataclass` as a decorator - always use `@dc.dataclass()` even if it is given no
|
|
208
|
+
arguments.
|
|
209
|
+
- **Strongly** prefer frozen dataclasses.
|
|
210
|
+
- In standard code, prefer to `from omlish import dataclasses as dc` - not the standard library `dataclasses` module.
|
|
211
|
+
The interface and behavior is the same.
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
### Exceptions
|
|
215
|
+
|
|
216
|
+
- **Never** use the `assert` statement anywhere but test code - rather, check a condition and raise an `Exception` if
|
|
217
|
+
necessary.
|
|
218
|
+
- Prefer to use the 'check' system (`from omlish import check`, or `from omlish.lite.check import check` for lite
|
|
219
|
+
code) where `assert` would otherwise be used.
|
|
220
|
+
- Outside of `TypeError`, `ValueError`, and `RuntimeError`, prefer to create custom subclasses of `Exception` for more
|
|
221
|
+
specific errors. Use inheritance where beneficial to communicate subtypes of errors.
|
|
222
|
+
- `KeyError` should however not be raised except in the specific and rare case of implementing a `ta.Mapping` or
|
|
223
|
+
direct equivalent. For example, a `UserService` `get_user` method should raise a `UserNotFoundError`, not
|
|
224
|
+
`KeyError`, when a given user is not found.
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
### Type Annotation
|
|
228
|
+
|
|
229
|
+
- Type annotate functions and class fields wherever possible, even if it is simply `ta.Any`, but use the most specific
|
|
230
|
+
annotation feasible.
|
|
231
|
+
- Lack of type annotation is an explicit choice communicating that that particular code cannot or should not be
|
|
232
|
+
statically typed (usually because it is particularly dynamic).
|
|
233
|
+
- Almost always, if one class field or function parameter is annotated, all fields / parameters / return values
|
|
234
|
+
should be.
|
|
235
|
+
- Return value annotations should be included on most magic methods like `__init__` and `__hash__`, but trickier
|
|
236
|
+
ones like `__exit__` and `__eq__` may be omitted.
|
|
237
|
+
- An exception to this is test code - in general don't bother type annotating test code, and in fact avoid test
|
|
238
|
+
function parameter annotations due to the dynamic nature of pytest fixtures.
|
|
239
|
+
- Use PEP-585 style annotations for builtin types - use `list[int]` instead of `ta.List[int]`, and `int | None`
|
|
240
|
+
instead of `ta.Optional[int]`.
|
|
241
|
+
- Use `typing` aliases for non-builtin types - use `ta.Sequence[int]` instead of `collections.abc.Sequence[int]`.
|
|
242
|
+
- Prefer to accept immutable, less-specific types - a function should likely use a `ta.Sequence[int]` parameter rather
|
|
243
|
+
than a `list[int]`. Use `ta.AbstractSet` over `set` and `frozenset`, and use `ta.Mapping` over `dict`, accordingly.
|
|
244
|
+
- When returning values, prefer to use the full type if the caller 'owns' the value, and use a less-specific, usually
|
|
245
|
+
immutable type when the caller does not. For example, a utility function filtering out odd numbers from a
|
|
246
|
+
`ta.Iterable[int]` can return a new `list[int]`, but a getter property on a class exposing some internal set of
|
|
247
|
+
integers should probably return a `ta.AbstractSet[int]` rather than a `set[int]`.
|
|
248
|
+
- Don't avoid `ta.Generic` and type parameters where it makes sense, but usually annotating something as a superclass
|
|
249
|
+
will suffice. When present in a class definition, `ta.Generic` should be the last class in the base class list.
|
|
250
|
+
- Do **NOT** use PEP-695 style type parameter syntax yet:
|
|
251
|
+
- Continue to declare `ta.TypeVar`'s explicitly at the top of the module.
|
|
252
|
+
- Continue to declare type aliases as global variables (whose own type is annotated as `ta.TypeAlias`). For example,
|
|
253
|
+
do `IntList: ta.TypeAlias = list[int]`, not `type IntList = list[int]`.
|
|
254
|
+
- Note that in \[**lite**\] code, there is no `ta.TypeAlias` yet (as it was added in 3.10). In lite code, suffix
|
|
255
|
+
the line with `# ta.TypeAlias`. Additionally, type aliases in lite code **must be kept on a single line**. This
|
|
256
|
+
restriction does not apply to standard code.
|
|
257
|
+
- Rationale: lite code is written to be 'amalgamated' - stitched together into a single python file - in which
|
|
258
|
+
case type aliases are relocated to the top of the file **and globally deduplicated**. As such each line of type
|
|
259
|
+
alias must be self-contained.
|
|
260
|
+
- Rationale: the `type` statement produces radically different and incompatible reflective behavior at runtime, and
|
|
261
|
+
in general tools still struggle with the new syntax.
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
### Comments
|
|
265
|
+
|
|
266
|
+
- Avoid unnecessary and frivolous comments. Most semantic meaning should be able to be inferred from package / module
|
|
267
|
+
/ type / method / function / parameter names and annotations. For example a function like
|
|
268
|
+
`def add_two_floats(x: float, y: float) -> float:` does not need a docstring or comments.
|
|
269
|
+
- Do not repeat typing information in function docstrings. In general function and parameter names and types should be
|
|
270
|
+
clear enough to not require explicitly listing them in docstrings. Do not use google-style or equivalent docstrings.
|
|
271
|
+
- Both opening and closing docstring triple-quotes should be alone on their own dedicated line *unless* the entire
|
|
272
|
+
docstring (including triple-quotes and indentation) fits on a single 120-column wide line.
|
|
273
|
+
- All docstrings should be followed by a blank line.
|
|
274
|
+
- Reserve inline comments for 'surprising' or dangerous things, such as invariants which must be maintained. A comment
|
|
275
|
+
like `self._ensure_user_exists() # ensure user exists` is worthless, but a comment like
|
|
276
|
+
`self._ensure_user_exists() # safe because we already hold the user lock` is valuable.
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
### Documentation
|
|
280
|
+
|
|
281
|
+
- Documentation should be written in markdown, and should have a general maximum line width of 120 characters.
|
|
282
|
+
- Substantial packages should have a `README.md` file at the root of the package directory outlining the package's
|
|
283
|
+
purpose, usage, and high level architecture. These files are automatically included in distributions as resources
|
|
284
|
+
for end-users.
|
|
285
|
+
- This is however a work in progress ☺️.
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
### Tests
|
|
289
|
+
|
|
290
|
+
- As above, write tests in pytest-style.
|
|
291
|
+
- Use raw assertions liberally in tests, and use pytest utilities like `pytest.raises`.
|
|
292
|
+
- Use fixtures and other advanced pytest features sparingly. Prefer to simply instantiate test data rather than wrap
|
|
293
|
+
it in a fixture.
|
|
294
|
+
- In \[**lite**\] test code the `unittest` package must be used instead of `pytest` because lite tests are run in
|
|
295
|
+
venvs with no external dependencies present.
|
|
296
|
+
- Be sure async tests are put in a `IsolatedAsyncioTestCase` subclass.
|
|
297
|
+
- It is equally fine to use both bare `assert` statements and `unittest` assert helpers like `assertCountEqual`.
|
|
298
|
+
- In general, prefer to write tests in a way that they can be run in parallel.
|
|
299
|
+
- Avoid mocks - prefer to structure code such that a 'simple' but still functioning implementation of an interface can
|
|
300
|
+
be used where a mock would otherwise. For example, for a some `UserService` interface with an `add_user` method, for
|
|
301
|
+
which a `RemoteUserService` would usually make a remote service call, prefer to implement a `DictUserService` class
|
|
302
|
+
with an `add_user` method such that it actually stores the added user in a dictionary on the instance.
|
|
303
|
+
- In practice these are usually useful to have outside of test code as default implementations anyway!
|
|
304
|
+
- These are often called 'fakes' but the term is avoided to emphasize their general non-test utility.
|
|
305
|
+
- Strongly avoid monkeypatching anything. Ideally code should be structured to allow more graceful means of
|
|
306
|
+
instrumentation and fault injection (e.g. via alternative interface implementations).
|
|
307
|
+
- Occasional unavoidable exceptions exist, such as being forced to patch an external dep, or when doing fault
|
|
308
|
+
injection that's too fine-grained to justify interface decomposition.
|
|
309
|
+
- An ideal to aim for is a test suite reproducing all realistic (or encountered) failures at each individual IO and
|
|
310
|
+
synchronization point.
|
|
311
|
+
- With multiple concurrent actors this may be achieved trhough 'lock-step' execution: with for example 2 related
|
|
312
|
+
actors running concurrently which encounter a shared point of synchronization, run a test twice, once with the
|
|
313
|
+
first actor running first, and once with the second actor running first.
|
|
314
|
+
- Do **not** use 'sleep' to simulate lock-step execution, timeouts, or other test conditions. Tests should strive to
|
|
315
|
+
deterministically complete as quickly as possible via explicit synchronization.
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
### Runtime
|
|
319
|
+
|
|
320
|
+
- Unless forced to through interaction with external code, do not use environment variables for anything.
|
|
321
|
+
Configuration should be injected, usually as keyword-only class constructor arguments, and usually in the form of
|
|
322
|
+
dataclasses or `ta.NewType`s.
|
|
323
|
+
- Outside of test code, *NEVER* use `__file__` - never assume python code is running in `.py` files on a filesystem.
|
|
324
|
+
(In general, do not even assume to have a readable filesystem). Write code compatible with zipfile python dists and
|
|
325
|
+
pyoxidizer in which there is no `__file__`. Access resources via `lang.get_package_resources` /
|
|
326
|
+
`lang.get_relative_resources`.
|
|
327
|
+
- In general, with very rare exception, 'everything (that does IO) needs a timeout', but the default may be large
|
|
328
|
+
enough to never be realistically hit in practice (think 5m for interactive work, 1h for background work).
|
|
329
|
+
- By default all pytests already run with a standard timeout.
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
### C Extensions
|
|
333
|
+
|
|
334
|
+
- C extensions use C11 and C++ extensions use C++20.
|
|
335
|
+
- In general prefer to write native extensions in C++.
|
|
336
|
+
- Use the C++ standard library liberally, but not 'excessively' lol. Write more 'C-style' code when interfacing with
|
|
337
|
+
CPython.
|
|
338
|
+
- C/C++ extensions should have `// @omlish-cext` as their first line, and will thereafter be automatically built and
|
|
339
|
+
packaged by existing codebase machinery.
|
|
340
|
+
- C/C++ extensions should be kept to a single, self-contained source file - do write new header files.
|
|
341
|
+
- C++ source files use the `.cc` extension, and C++ header files use the `.hh` extension.
|
|
342
|
+
- Native extensions *must* use PEP-489 style multi-phase extension initialization (`PyModuleDef_Init`).
|
|
343
|
+
- Modules should mark themselves `Py_MOD_GIL_NOT_USED` and `Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED` as applicable.
|
|
344
|
+
Modules should strive to be written to support both if at all possible.
|
|
345
|
+
- See `omdev/cexts/_boilerplate.cc` for a simple C++ extension template.
|
omlish/README.md
CHANGED
|
@@ -139,8 +139,8 @@ dependencies of any kind**.
|
|
|
139
139
|
- **[plugins](https://github.com/wrmsr/omlish/blob/master/omlish/testing/pytest/plugins)** - Various other plugins.
|
|
140
140
|
|
|
141
141
|
- **[typedvalues](https://github.com/wrmsr/omlish/blob/master/omlish/typedvalues)** - A little toolkit around 'boxed'
|
|
142
|
-
values, whose 'box' types convey more information than the bare values themselves. A rebellion against kwargs
|
|
143
|
-
vars: instead of `foo(bar=1, baz=2)`, you do `foo(Bar(1), Baz(2))`.
|
|
142
|
+
values, whose 'box' types convey more information than the bare values themselves. A rebellion against kwargs / env
|
|
143
|
+
vars / giant config objects: instead of `foo(bar=1, baz=2)`, you do `foo(Bar(1), Baz(2))`.
|
|
144
144
|
|
|
145
145
|
- **[lite](https://github.com/wrmsr/omlish/blob/master/omlish/lite)** - The standard library of 'lite' code. This is the
|
|
146
146
|
only package beneath `lang`, and parts of it are re-exported by it for deduplication. On top of miscellaneous
|
omlish/__about__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
__version__ = '0.0.0.
|
|
2
|
-
__revision__ = '
|
|
1
|
+
__version__ = '0.0.0.dev506'
|
|
2
|
+
__revision__ = '3d7c2a9a417108012b5cf11d20ed3e2eef483b6d'
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
#
|
|
@@ -60,7 +60,7 @@ class Project(ProjectBase):
|
|
|
60
60
|
'asttokens ~= 3.0',
|
|
61
61
|
'executing ~= 2.2',
|
|
62
62
|
|
|
63
|
-
'psutil ~= 7.
|
|
63
|
+
'psutil ~= 7.2',
|
|
64
64
|
],
|
|
65
65
|
|
|
66
66
|
'formats': [
|
|
@@ -69,7 +69,7 @@ class Project(ProjectBase):
|
|
|
69
69
|
|
|
70
70
|
'pyyaml ~= 6.0',
|
|
71
71
|
|
|
72
|
-
'cbor2 ~= 5.
|
|
72
|
+
'cbor2 ~= 5.8',
|
|
73
73
|
|
|
74
74
|
'cloudpickle ~= 3.1',
|
|
75
75
|
],
|
|
@@ -99,6 +99,8 @@ class Project(ProjectBase):
|
|
|
99
99
|
# 'mysql-connector-python ~= 9.5',
|
|
100
100
|
# 'mysqlclient ~= 2.2',
|
|
101
101
|
|
|
102
|
+
'snowflake-connector-python ~= 4.2',
|
|
103
|
+
|
|
102
104
|
'aiomysql ~= 0.3',
|
|
103
105
|
'aiosqlite ~= 0.22',
|
|
104
106
|
'asyncpg ~= 0.31',
|
omlish/_check.cc
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
// @omlish-cext
|
|
2
|
+
#define PY_SSIZE_T_CLEAN
|
|
3
|
+
#include "Python.h"
|
|
4
|
+
#include "structmember.h"
|
|
5
|
+
|
|
6
|
+
#include <vector>
|
|
7
|
+
|
|
8
|
+
//
|
|
9
|
+
|
|
10
|
+
#define _MODULE_NAME "_check"
|
|
11
|
+
#define _PACKAGE_NAME "omlish"
|
|
12
|
+
#define _MODULE_FULL_NAME _PACKAGE_NAME "." _MODULE_NAME
|
|
13
|
+
|
|
14
|
+
typedef struct check_state {
|
|
15
|
+
PyObject *typing_any;
|
|
16
|
+
PyTypeObject *nonetype;
|
|
17
|
+
} check_state;
|
|
18
|
+
|
|
19
|
+
static inline check_state * get_check_state(PyObject *module)
|
|
20
|
+
{
|
|
21
|
+
void *state = PyModule_GetState(module);
|
|
22
|
+
assert(state != NULL);
|
|
23
|
+
return (check_state *)state;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
//
|
|
27
|
+
|
|
28
|
+
PyDoc_STRVAR(unpack_isinstance_spec_doc, "unpack_isinstance_spec(spec)");
|
|
29
|
+
|
|
30
|
+
static PyObject * unpack_isinstance_spec(PyObject *module, PyObject *spec)
|
|
31
|
+
{
|
|
32
|
+
check_state *state = get_check_state(module);
|
|
33
|
+
|
|
34
|
+
// If spec is a type, return (spec,)
|
|
35
|
+
if (PyType_Check(spec)) {
|
|
36
|
+
return PyTuple_Pack(1, spec);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
PyObject *tuple_spec = nullptr;
|
|
40
|
+
|
|
41
|
+
// If not a tuple, wrap it in a tuple
|
|
42
|
+
if (!PyTuple_Check(spec)) {
|
|
43
|
+
tuple_spec = PyTuple_Pack(1, spec);
|
|
44
|
+
if (tuple_spec == nullptr) {
|
|
45
|
+
return nullptr;
|
|
46
|
+
}
|
|
47
|
+
} else {
|
|
48
|
+
// It's already a tuple, so we'll work with it
|
|
49
|
+
tuple_spec = spec;
|
|
50
|
+
Py_INCREF(tuple_spec);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Check if None is in spec
|
|
54
|
+
Py_ssize_t size = PyTuple_Size(tuple_spec);
|
|
55
|
+
if (size < 0) {
|
|
56
|
+
Py_DECREF(tuple_spec);
|
|
57
|
+
return nullptr;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
bool has_none = false;
|
|
61
|
+
bool has_any = false;
|
|
62
|
+
|
|
63
|
+
for (Py_ssize_t i = 0; i < size; i++) {
|
|
64
|
+
PyObject *item = PyTuple_GetItem(tuple_spec, i); // borrowed reference
|
|
65
|
+
if (item == nullptr) {
|
|
66
|
+
Py_DECREF(tuple_spec);
|
|
67
|
+
return nullptr;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (item == Py_None) {
|
|
71
|
+
has_none = true;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Check if item is typing.Any
|
|
75
|
+
if (state->typing_any != nullptr) {
|
|
76
|
+
int cmp = PyObject_RichCompareBool(item, state->typing_any, Py_EQ);
|
|
77
|
+
if (cmp < 0) {
|
|
78
|
+
Py_DECREF(tuple_spec);
|
|
79
|
+
return nullptr;
|
|
80
|
+
}
|
|
81
|
+
if (cmp) {
|
|
82
|
+
has_any = true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// If typing.Any is in spec, return (object,)
|
|
88
|
+
if (has_any) {
|
|
89
|
+
Py_DECREF(tuple_spec);
|
|
90
|
+
return PyTuple_Pack(1, &PyBaseObject_Type);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// If None is in spec, filter it out and add NoneType
|
|
94
|
+
if (has_none) {
|
|
95
|
+
std::vector<PyObject*> filtered;
|
|
96
|
+
filtered.reserve(size);
|
|
97
|
+
|
|
98
|
+
for (Py_ssize_t i = 0; i < size; i++) {
|
|
99
|
+
PyObject *item = PyTuple_GetItem(tuple_spec, i); // borrowed reference
|
|
100
|
+
if (item != Py_None) {
|
|
101
|
+
filtered.push_back(item);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Add NoneType
|
|
106
|
+
filtered.push_back((PyObject *)state->nonetype);
|
|
107
|
+
|
|
108
|
+
// Create new tuple
|
|
109
|
+
PyObject *result = PyTuple_New(filtered.size());
|
|
110
|
+
if (result == nullptr) {
|
|
111
|
+
Py_DECREF(tuple_spec);
|
|
112
|
+
return nullptr;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
for (size_t i = 0; i < filtered.size(); i++) {
|
|
116
|
+
Py_INCREF(filtered[i]);
|
|
117
|
+
PyTuple_SET_ITEM(result, i, filtered[i]);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
Py_DECREF(tuple_spec);
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Return the tuple as-is
|
|
125
|
+
return tuple_spec;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
//
|
|
129
|
+
|
|
130
|
+
PyDoc_STRVAR(check_doc, "Native C++ implementations for omlish.lite.check");
|
|
131
|
+
|
|
132
|
+
static int check_exec(PyObject *module)
|
|
133
|
+
{
|
|
134
|
+
check_state *state = get_check_state(module);
|
|
135
|
+
|
|
136
|
+
// Get typing.Any
|
|
137
|
+
PyObject *typing_module = PyImport_ImportModule("typing");
|
|
138
|
+
if (typing_module == nullptr) {
|
|
139
|
+
// If typing module is not available, just set to nullptr
|
|
140
|
+
PyErr_Clear();
|
|
141
|
+
state->typing_any = nullptr;
|
|
142
|
+
} else {
|
|
143
|
+
state->typing_any = PyObject_GetAttrString(typing_module, "Any");
|
|
144
|
+
Py_DECREF(typing_module);
|
|
145
|
+
if (state->typing_any == nullptr) {
|
|
146
|
+
PyErr_Clear();
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Get NoneType (type(None))
|
|
151
|
+
state->nonetype = Py_TYPE(Py_None);
|
|
152
|
+
Py_INCREF(state->nonetype);
|
|
153
|
+
|
|
154
|
+
return 0;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
static int check_traverse(PyObject *module, visitproc visit, void *arg)
|
|
158
|
+
{
|
|
159
|
+
check_state *state = get_check_state(module);
|
|
160
|
+
Py_VISIT(state->typing_any);
|
|
161
|
+
Py_VISIT(state->nonetype);
|
|
162
|
+
return 0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
static int check_clear(PyObject *module)
|
|
166
|
+
{
|
|
167
|
+
check_state *state = get_check_state(module);
|
|
168
|
+
Py_CLEAR(state->typing_any);
|
|
169
|
+
Py_CLEAR(state->nonetype);
|
|
170
|
+
return 0;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
static void check_free(void *module)
|
|
174
|
+
{
|
|
175
|
+
check_clear((PyObject *)module);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
static PyMethodDef check_methods[] = {
|
|
179
|
+
{"unpack_isinstance_spec", (PyCFunction)unpack_isinstance_spec, METH_O, unpack_isinstance_spec_doc},
|
|
180
|
+
{NULL, NULL, 0, NULL}
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
static struct PyModuleDef_Slot check_slots[] = {
|
|
184
|
+
{Py_mod_exec, (void *) check_exec},
|
|
185
|
+
{Py_mod_gil, Py_MOD_GIL_NOT_USED},
|
|
186
|
+
{Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED},
|
|
187
|
+
{0, NULL}
|
|
188
|
+
};
|
|
189
|
+
|
|
190
|
+
static struct PyModuleDef check_module = {
|
|
191
|
+
.m_base = PyModuleDef_HEAD_INIT,
|
|
192
|
+
.m_name = _MODULE_NAME,
|
|
193
|
+
.m_doc = check_doc,
|
|
194
|
+
.m_size = sizeof(check_state),
|
|
195
|
+
.m_methods = check_methods,
|
|
196
|
+
.m_slots = check_slots,
|
|
197
|
+
.m_traverse = check_traverse,
|
|
198
|
+
.m_clear = check_clear,
|
|
199
|
+
.m_free = check_free,
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
extern "C" {
|
|
203
|
+
|
|
204
|
+
PyMODINIT_FUNC PyInit__check(void)
|
|
205
|
+
{
|
|
206
|
+
return PyModuleDef_Init(&check_module);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
}
|
omlish/check.py
CHANGED
|
@@ -26,6 +26,17 @@ _callable = callable
|
|
|
26
26
|
##
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
try:
|
|
30
|
+
from . import _check # type: ignore
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
33
|
+
else:
|
|
34
|
+
setattr(Checks, '_unpack_isinstance_spec', _check.unpack_isinstance_spec)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
##
|
|
38
|
+
|
|
39
|
+
|
|
29
40
|
def register_on_raise(fn: OnRaiseFn) -> None:
|
|
30
41
|
check.register_on_raise(fn)
|
|
31
42
|
|
omlish/dataclasses/__init__.py
CHANGED
|
@@ -79,6 +79,9 @@ class FrozenGenerator(Generator[FrozenPlan]):
|
|
|
79
79
|
if not ctx.cs.frozen:
|
|
80
80
|
return None
|
|
81
81
|
|
|
82
|
+
if issubclass(ctx.cls, BaseException):
|
|
83
|
+
raise TypeError('cannot use frozen=True with subclass of BaseException')
|
|
84
|
+
|
|
82
85
|
return PlanResult(FrozenPlan(
|
|
83
86
|
fields=tuple(f.name for f in ctx.cs.fields),
|
|
84
87
|
allow_dynamic_dunder_attrs=ctx.cs.allow_dynamic_dunder_attrs,
|
|
@@ -118,7 +121,7 @@ class FrozenGenerator(Generator[FrozenPlan]):
|
|
|
118
121
|
f'}}',
|
|
119
122
|
f'',
|
|
120
123
|
])
|
|
121
|
-
condition.append(f'
|
|
124
|
+
condition.append(f'or name in {set_ident}')
|
|
122
125
|
|
|
123
126
|
return AddMethodOp(
|
|
124
127
|
f'__{mth}__',
|