lairs 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lairs/__init__.py +142 -0
- lairs/_aturi.py +59 -0
- lairs/_codegen/__init__.py +30 -0
- lairs/_codegen/emit.py +450 -0
- lairs/_codegen/manifest.py +99 -0
- lairs/_codegen/pipeline.py +366 -0
- lairs/_codegen/schema_to_spec.py +627 -0
- lairs/_types.py +38 -0
- lairs/atproto/__init__.py +84 -0
- lairs/atproto/_car.py +107 -0
- lairs/atproto/appview.py +238 -0
- lairs/atproto/auth.py +383 -0
- lairs/atproto/blobs.py +250 -0
- lairs/atproto/firehose.py +374 -0
- lairs/atproto/identity.py +419 -0
- lairs/atproto/pds.py +873 -0
- lairs/author/__init__.py +60 -0
- lairs/author/builders.py +595 -0
- lairs/author/publish.py +1391 -0
- lairs/cli.py +1403 -0
- lairs/data/__init__.py +34 -0
- lairs/data/corpus.py +806 -0
- lairs/data/dataset.py +438 -0
- lairs/data/features.py +252 -0
- lairs/discovery/__init__.py +46 -0
- lairs/discovery/accelerator.py +137 -0
- lairs/discovery/actor.py +299 -0
- lairs/discovery/cards.py +335 -0
- lairs/discovery/federated.py +144 -0
- lairs/discovery/index.py +341 -0
- lairs/discovery/ingest.py +362 -0
- lairs/discovery/links.py +169 -0
- lairs/discovery/models.py +221 -0
- lairs/discovery/query.py +175 -0
- lairs/discovery/summary.py +283 -0
- lairs/integrations/__init__.py +43 -0
- lairs/integrations/codecs/__init__.py +82 -0
- lairs/integrations/codecs/brat.py +795 -0
- lairs/integrations/codecs/conllu.py +987 -0
- lairs/integrations/hf/__init__.py +41 -0
- lairs/integrations/hf/datasets.py +527 -0
- lairs/integrations/hf/hub.py +480 -0
- lairs/integrations/kb/__init__.py +78 -0
- lairs/integrations/kb/glazing.py +480 -0
- lairs/integrations/kb/reconciliation.py +475 -0
- lairs/integrations/kb/wikidata.py +590 -0
- lairs/integrations/ports.py +217 -0
- lairs/integrations/registry.py +279 -0
- lairs/integrations/tfdata.py +428 -0
- lairs/integrations/torch.py +497 -0
- lairs/integrations/tracking.py +297 -0
- lairs/integrations/webdataset.py +548 -0
- lairs/lexicons/MANIFEST.toml +18 -0
- lairs/lexicons/README.md +17 -0
- lairs/lexicons/pub/layers/alignment/alignment.json +130 -0
- lairs/lexicons/pub/layers/alignment/getAlignment.json +30 -0
- lairs/lexicons/pub/layers/alignment/listAlignments.json +43 -0
- lairs/lexicons/pub/layers/annotation/annotationLayer.json +239 -0
- lairs/lexicons/pub/layers/annotation/clusterSet.json +86 -0
- lairs/lexicons/pub/layers/annotation/defs.json +165 -0
- lairs/lexicons/pub/layers/annotation/getAnnotationLayer.json +30 -0
- lairs/lexicons/pub/layers/annotation/getClusterSet.json +30 -0
- lairs/lexicons/pub/layers/annotation/listAnnotationLayers.json +44 -0
- lairs/lexicons/pub/layers/annotation/listClusterSets.json +43 -0
- lairs/lexicons/pub/layers/authAnnotator.json +64 -0
- lairs/lexicons/pub/layers/authCorpusManager.json +64 -0
- lairs/lexicons/pub/layers/authExperimenter.json +52 -0
- lairs/lexicons/pub/layers/authFull.json +110 -0
- lairs/lexicons/pub/layers/authOntologyEditor.json +46 -0
- lairs/lexicons/pub/layers/authReadOnly.json +73 -0
- lairs/lexicons/pub/layers/changelog/defs.json +107 -0
- lairs/lexicons/pub/layers/changelog/entry.json +56 -0
- lairs/lexicons/pub/layers/changelog/getEntry.json +30 -0
- lairs/lexicons/pub/layers/changelog/listByCollection.json +45 -0
- lairs/lexicons/pub/layers/changelog/listEntries.json +46 -0
- lairs/lexicons/pub/layers/corpus/corpus.json +110 -0
- lairs/lexicons/pub/layers/corpus/defs.json +173 -0
- lairs/lexicons/pub/layers/corpus/getCorpus.json +30 -0
- lairs/lexicons/pub/layers/corpus/getMembership.json +30 -0
- lairs/lexicons/pub/layers/corpus/listCorpora.json +85 -0
- lairs/lexicons/pub/layers/corpus/listMemberships.json +43 -0
- lairs/lexicons/pub/layers/corpus/membership.json +55 -0
- lairs/lexicons/pub/layers/defs.json +972 -0
- lairs/lexicons/pub/layers/eprint/dataLink.json +90 -0
- lairs/lexicons/pub/layers/eprint/defs.json +248 -0
- lairs/lexicons/pub/layers/eprint/eprint.json +119 -0
- lairs/lexicons/pub/layers/eprint/getDataLink.json +30 -0
- lairs/lexicons/pub/layers/eprint/getEprint.json +30 -0
- lairs/lexicons/pub/layers/eprint/listDataLinks.json +43 -0
- lairs/lexicons/pub/layers/eprint/listEprints.json +44 -0
- lairs/lexicons/pub/layers/expression/expression.json +144 -0
- lairs/lexicons/pub/layers/expression/getExpression.json +30 -0
- lairs/lexicons/pub/layers/expression/listExpressions.json +90 -0
- lairs/lexicons/pub/layers/graph/defs.json +47 -0
- lairs/lexicons/pub/layers/graph/getGraphEdge.json +30 -0
- lairs/lexicons/pub/layers/graph/getGraphEdgeSet.json +30 -0
- lairs/lexicons/pub/layers/graph/getGraphNode.json +30 -0
- lairs/lexicons/pub/layers/graph/graphEdge.json +89 -0
- lairs/lexicons/pub/layers/graph/graphEdgeSet.json +98 -0
- lairs/lexicons/pub/layers/graph/graphNode.json +55 -0
- lairs/lexicons/pub/layers/graph/listGraphEdgeSets.json +44 -0
- lairs/lexicons/pub/layers/graph/listGraphEdges.json +45 -0
- lairs/lexicons/pub/layers/graph/listGraphNodes.json +43 -0
- lairs/lexicons/pub/layers/integration/README.md +212 -0
- lairs/lexicons/pub/layers/integration/applyLens.json +61 -0
- lairs/lexicons/pub/layers/integration/getExternal.json +45 -0
- lairs/lexicons/pub/layers/integration/listExternal.json +53 -0
- lairs/lexicons/pub/layers/judgment/agreementReport.json +62 -0
- lairs/lexicons/pub/layers/judgment/defs.json +240 -0
- lairs/lexicons/pub/layers/judgment/experimentDef.json +182 -0
- lairs/lexicons/pub/layers/judgment/getAgreementReport.json +30 -0
- lairs/lexicons/pub/layers/judgment/getExperimentDef.json +30 -0
- lairs/lexicons/pub/layers/judgment/getJudgmentSet.json +30 -0
- lairs/lexicons/pub/layers/judgment/judgmentSet.json +55 -0
- lairs/lexicons/pub/layers/judgment/listAgreementReports.json +43 -0
- lairs/lexicons/pub/layers/judgment/listExperimentDefs.json +44 -0
- lairs/lexicons/pub/layers/judgment/listJudgmentSets.json +42 -0
- lairs/lexicons/pub/layers/media/defs.json +149 -0
- lairs/lexicons/pub/layers/media/getMedia.json +30 -0
- lairs/lexicons/pub/layers/media/listMedia.json +43 -0
- lairs/lexicons/pub/layers/media/media.json +147 -0
- lairs/lexicons/pub/layers/ontology/defs.json +67 -0
- lairs/lexicons/pub/layers/ontology/getOntology.json +30 -0
- lairs/lexicons/pub/layers/ontology/getTypeDef.json +30 -0
- lairs/lexicons/pub/layers/ontology/listOntologies.json +43 -0
- lairs/lexicons/pub/layers/ontology/listTypeDefs.json +43 -0
- lairs/lexicons/pub/layers/ontology/ontology.json +80 -0
- lairs/lexicons/pub/layers/ontology/typeDef.json +83 -0
- lairs/lexicons/pub/layers/persona/getPersona.json +30 -0
- lairs/lexicons/pub/layers/persona/listPersonas.json +44 -0
- lairs/lexicons/pub/layers/persona/persona.json +104 -0
- lairs/lexicons/pub/layers/resource/collection.json +102 -0
- lairs/lexicons/pub/layers/resource/collectionMembership.json +45 -0
- lairs/lexicons/pub/layers/resource/defs.json +164 -0
- lairs/lexicons/pub/layers/resource/entry.json +100 -0
- lairs/lexicons/pub/layers/resource/filling.json +75 -0
- lairs/lexicons/pub/layers/resource/getCollection.json +30 -0
- lairs/lexicons/pub/layers/resource/getCollectionMembership.json +30 -0
- lairs/lexicons/pub/layers/resource/getEntry.json +30 -0
- lairs/lexicons/pub/layers/resource/getFilling.json +30 -0
- lairs/lexicons/pub/layers/resource/getTemplate.json +30 -0
- lairs/lexicons/pub/layers/resource/getTemplateComposition.json +30 -0
- lairs/lexicons/pub/layers/resource/listCollectionMemberships.json +42 -0
- lairs/lexicons/pub/layers/resource/listCollections.json +85 -0
- lairs/lexicons/pub/layers/resource/listEntries.json +82 -0
- lairs/lexicons/pub/layers/resource/listFillings.json +43 -0
- lairs/lexicons/pub/layers/resource/listTemplateCompositions.json +43 -0
- lairs/lexicons/pub/layers/resource/listTemplates.json +82 -0
- lairs/lexicons/pub/layers/resource/template.json +90 -0
- lairs/lexicons/pub/layers/resource/templateComposition.json +54 -0
- lairs/lexicons/pub/layers/segmentation/defs.json +71 -0
- lairs/lexicons/pub/layers/segmentation/getSegmentation.json +30 -0
- lairs/lexicons/pub/layers/segmentation/listSegmentations.json +42 -0
- lairs/lexicons/pub/layers/segmentation/segmentation.json +72 -0
- lairs/media/__init__.py +29 -0
- lairs/media/anchors.py +381 -0
- lairs/media/audio.py +214 -0
- lairs/media/neural.py +295 -0
- lairs/media/resolve.py +312 -0
- lairs/media/video.py +289 -0
- lairs/py.typed +0 -0
- lairs/records/__init__.py +55 -0
- lairs/records/_generated/.gitkeep +0 -0
- lairs/records/_generated/__init__.py +42 -0
- lairs/records/_generated/alignment.py +126 -0
- lairs/records/_generated/annotation.py +424 -0
- lairs/records/_generated/changelog.py +144 -0
- lairs/records/_generated/corpus.py +319 -0
- lairs/records/_generated/defs.py +1075 -0
- lairs/records/_generated/eprint.py +445 -0
- lairs/records/_generated/expression.py +127 -0
- lairs/records/_generated/graph.py +333 -0
- lairs/records/_generated/judgment.py +506 -0
- lairs/records/_generated/media.py +302 -0
- lairs/records/_generated/ontology.py +197 -0
- lairs/records/_generated/persona.py +122 -0
- lairs/records/_generated/resource.py +474 -0
- lairs/records/_generated/segmentation.py +129 -0
- lairs/records/blobref.py +80 -0
- lairs/records/views.py +123 -0
- lairs/store/__init__.py +32 -0
- lairs/store/arrow.py +596 -0
- lairs/store/blobcache.py +228 -0
- lairs/store/pool.py +255 -0
- lairs/store/repository.py +595 -0
- lairs/tui/__init__.py +93 -0
- lairs/tui/app.py +193 -0
- lairs/tui/browse.py +234 -0
- lairs/tui/query.py +555 -0
- lairs/tui/registry.py +75 -0
- lairs/tui/screens/__init__.py +9 -0
- lairs/tui/screens/browse.py +200 -0
- lairs/tui/screens/explore.py +204 -0
- lairs/tui/screens/query.py +238 -0
- lairs/tui/styles.tcss +219 -0
- lairs/tui/views.py +964 -0
- lairs/tui/viz.py +967 -0
- lairs-0.1.0.dist-info/METADATA +216 -0
- lairs-0.1.0.dist-info/RECORD +202 -0
- lairs-0.1.0.dist-info/WHEEL +4 -0
- lairs-0.1.0.dist-info/entry_points.txt +17 -0
- lairs-0.1.0.dist-info/licenses/LICENSE +21 -0
lairs/__init__.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""lairs: a read/write dataset client for the Layers format.
|
|
2
|
+
|
|
3
|
+
lairs reads and writes ``pub.layers.*`` records over ATProto, validates them
|
|
4
|
+
against models generated from the Layers lexicons, and exposes them through a
|
|
5
|
+
``datasets``-like API with first-class tooling for audio, video, and neural
|
|
6
|
+
modalities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from importlib.metadata import PackageNotFoundError
|
|
12
|
+
from importlib.metadata import version as _distribution_version
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
from lairs.atproto.auth import Session, authed_client, login
|
|
16
|
+
from lairs.data import Corpus, load_corpus
|
|
17
|
+
from lairs.discovery import (
|
|
18
|
+
DatasetFilter,
|
|
19
|
+
DatasetSummary,
|
|
20
|
+
RepoTableOfContents,
|
|
21
|
+
discover_datasets,
|
|
22
|
+
list_datasets,
|
|
23
|
+
table_of_contents,
|
|
24
|
+
)
|
|
25
|
+
from lairs.integrations.registry import (
|
|
26
|
+
get_codec,
|
|
27
|
+
get_exporter,
|
|
28
|
+
get_knowledge_base,
|
|
29
|
+
)
|
|
30
|
+
from lairs.records.blobref import BlobRef
|
|
31
|
+
|
|
32
|
+
if TYPE_CHECKING:
|
|
33
|
+
from lairs.integrations.ports import Codec, Exporter, KnowledgeBase
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"BlobRef",
|
|
37
|
+
"Corpus",
|
|
38
|
+
"DatasetFilter",
|
|
39
|
+
"DatasetSummary",
|
|
40
|
+
"RepoTableOfContents",
|
|
41
|
+
"Session",
|
|
42
|
+
"__version__",
|
|
43
|
+
"authed_client",
|
|
44
|
+
"codec",
|
|
45
|
+
"discover_datasets",
|
|
46
|
+
"exporter",
|
|
47
|
+
"knowledge_base",
|
|
48
|
+
"list_datasets",
|
|
49
|
+
"load_corpus",
|
|
50
|
+
"login",
|
|
51
|
+
"table_of_contents",
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
_FALLBACK_VERSION = "0.1.0"
|
|
55
|
+
"""The version reported when no installed distribution metadata is available.
|
|
56
|
+
|
|
57
|
+
This literal is the single source of truth for source and editable trees where
|
|
58
|
+
``importlib.metadata`` cannot find an installed ``lairs`` distribution. It must
|
|
59
|
+
be kept in step with the ``version`` field in ``pyproject.toml``.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _resolve_version() -> str:
|
|
64
|
+
"""Return the installed distribution version, falling back to a literal.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
str
|
|
69
|
+
The version string from the installed ``lairs`` distribution metadata,
|
|
70
|
+
or ``_FALLBACK_VERSION`` when the package is not installed (for example
|
|
71
|
+
when running from a source checkout).
|
|
72
|
+
"""
|
|
73
|
+
try:
|
|
74
|
+
return _distribution_version("lairs")
|
|
75
|
+
except PackageNotFoundError:
|
|
76
|
+
return _FALLBACK_VERSION
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
__version__ = _resolve_version()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def codec(name: str) -> type[Codec]:
|
|
83
|
+
"""Look up a registered codec adapter class by name.
|
|
84
|
+
|
|
85
|
+
Parameters
|
|
86
|
+
----------
|
|
87
|
+
name : str
|
|
88
|
+
The codec name (for example ``"conllu"`` or ``"brat"``).
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
type
|
|
93
|
+
The registered codec class.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
lairs.integrations.registry.UnknownAdapterError
|
|
98
|
+
If no codec is registered under ``name``.
|
|
99
|
+
"""
|
|
100
|
+
return get_codec(name)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def exporter(name: str) -> type[Exporter]:
|
|
104
|
+
"""Look up a registered exporter adapter class by name.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
name : str
|
|
109
|
+
The exporter name (for example ``"hf"`` or ``"torch"``).
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
type
|
|
114
|
+
The registered exporter class.
|
|
115
|
+
|
|
116
|
+
Raises
|
|
117
|
+
------
|
|
118
|
+
lairs.integrations.registry.UnknownAdapterError
|
|
119
|
+
If no exporter is registered under ``name``.
|
|
120
|
+
"""
|
|
121
|
+
return get_exporter(name)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def knowledge_base(name: str) -> type[KnowledgeBase]:
|
|
125
|
+
"""Look up a registered knowledge-base adapter class by name.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
name : str
|
|
130
|
+
The knowledge-base name (for example ``"wikidata"``).
|
|
131
|
+
|
|
132
|
+
Returns
|
|
133
|
+
-------
|
|
134
|
+
type
|
|
135
|
+
The registered knowledge-base class.
|
|
136
|
+
|
|
137
|
+
Raises
|
|
138
|
+
------
|
|
139
|
+
lairs.integrations.registry.UnknownAdapterError
|
|
140
|
+
If no knowledge base is registered under ``name``.
|
|
141
|
+
"""
|
|
142
|
+
return get_knowledge_base(name)
|
lairs/_aturi.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""AT-URI parsing helpers shared across lairs.
|
|
2
|
+
|
|
3
|
+
Small, dependency-free helpers for pulling the authority and collection segments
|
|
4
|
+
out of an ``at://`` URI. Centralised here so the discovery, CLI, and data layers
|
|
5
|
+
parse AT-URIs the same way.
|
|
6
|
+
|
|
7
|
+
These helpers are positional string splitters, not validators. They assume a
|
|
8
|
+
well-formed ``at://authority/collection/rkey`` URI and return an empty string
|
|
9
|
+
for a missing segment; they do not check the ``at://`` scheme or the authority
|
|
10
|
+
shape, so malformed input yields a best-effort segment rather than an error.
|
|
11
|
+
Callers that need validation must do it before calling.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
__all__ = ["authority_of", "nsid_of"]
|
|
17
|
+
|
|
18
|
+
_AT_URI_PREFIX = "at://"
|
|
19
|
+
"""The scheme prefix every AT-URI carries."""
|
|
20
|
+
|
|
21
|
+
_MIN_PARTS_WITH_COLLECTION = 2
|
|
22
|
+
"""The number of path segments an AT-URI needs to carry a collection NSID."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def authority_of(uri: str) -> str:
|
|
26
|
+
"""Return the authority (DID or handle) segment of an AT-URI.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
uri : str
|
|
31
|
+
The AT-URI to parse.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
str
|
|
36
|
+
The authority segment, or an empty string when ``uri`` is empty.
|
|
37
|
+
"""
|
|
38
|
+
body = uri.removeprefix(_AT_URI_PREFIX)
|
|
39
|
+
return body.split("/", 1)[0] if body else ""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def nsid_of(uri: str) -> str:
|
|
43
|
+
"""Return the collection NSID segment of an AT-URI.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
uri : str
|
|
48
|
+
The AT-URI to parse.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
str
|
|
53
|
+
The collection NSID, or an empty string when the URI has no collection.
|
|
54
|
+
"""
|
|
55
|
+
body = uri.removeprefix(_AT_URI_PREFIX)
|
|
56
|
+
parts = body.split("/")
|
|
57
|
+
if len(parts) >= _MIN_PARTS_WITH_COLLECTION:
|
|
58
|
+
return parts[1]
|
|
59
|
+
return ""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Codegen pipeline that turns vendored lexicons into generated models.
|
|
2
|
+
|
|
3
|
+
The pipeline parses each lexicon into a panproto ``Schema``, walks it into
|
|
4
|
+
didactic spec dicts, builds models, and emits Python module text.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from lairs._codegen.emit import emit_module
|
|
10
|
+
from lairs._codegen.manifest import Manifest, load_manifest
|
|
11
|
+
from lairs._codegen.pipeline import check, generate, namespace_specs
|
|
12
|
+
from lairs._codegen.schema_to_spec import (
|
|
13
|
+
FieldSpec,
|
|
14
|
+
ModelSpec,
|
|
15
|
+
VariantSpec,
|
|
16
|
+
schema_to_specs,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"FieldSpec",
|
|
21
|
+
"Manifest",
|
|
22
|
+
"ModelSpec",
|
|
23
|
+
"VariantSpec",
|
|
24
|
+
"check",
|
|
25
|
+
"emit_module",
|
|
26
|
+
"generate",
|
|
27
|
+
"load_manifest",
|
|
28
|
+
"namespace_specs",
|
|
29
|
+
"schema_to_specs",
|
|
30
|
+
]
|
lairs/_codegen/emit.py
ADDED
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
"""Emit Python module text for generated models.
|
|
2
|
+
|
|
3
|
+
Renders :class:`~lairs._codegen.schema_to_spec.ModelSpec` value models into
|
|
4
|
+
committed module source text with a generated-by header and the source manifest
|
|
5
|
+
hash. Emission is deterministic (stable class ordering and stable field
|
|
6
|
+
ordering) so the ``lairs gen --check`` drift gate is meaningful. The emitted
|
|
7
|
+
modules are the import surface of :mod:`lairs.records`; they are rich didactic
|
|
8
|
+
models carrying descriptions, optionality, refined value types, integer ranges,
|
|
9
|
+
knownValues, and union discriminators, which the lossy spec-synthesis path could
|
|
10
|
+
not reconstruct.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
20
|
+
from lairs._codegen.schema_to_spec import FieldSpec, ModelSpec, VariantSpec
|
|
21
|
+
|
|
22
|
+
__all__ = ["emit_module"]
|
|
23
|
+
|
|
24
|
+
_HEADER_LINE = "# generated by lairs gen; do not edit"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def emit_module(
|
|
28
|
+
specs: Sequence[ModelSpec],
|
|
29
|
+
*,
|
|
30
|
+
manifest_hash: str,
|
|
31
|
+
) -> str:
|
|
32
|
+
"""Render record and union specs to Python module source text.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
specs : collections.abc.Sequence of lairs._codegen.schema_to_spec.ModelSpec
|
|
37
|
+
The specs for one namespace, already ordered so embed targets precede
|
|
38
|
+
the models that embed them.
|
|
39
|
+
manifest_hash : str
|
|
40
|
+
The content hash of the source lexicon tree, recorded in the header so
|
|
41
|
+
the committed module records the lexicon revision it was generated from.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
str
|
|
46
|
+
The module source text, with a generated-by header, the manifest hash,
|
|
47
|
+
a module docstring, imports, the emitted classes, and an ``__all__``.
|
|
48
|
+
"""
|
|
49
|
+
ordered = _order_specs(specs)
|
|
50
|
+
uses_datetime = any(_spec_uses_datetime(spec) for spec in ordered)
|
|
51
|
+
uses_literal = any(spec.is_union for spec in ordered)
|
|
52
|
+
uses_blobref = any(_spec_uses_blobref(spec) for spec in ordered)
|
|
53
|
+
uses_mixed_case = any(_spec_uses_mixed_case(spec) for spec in ordered)
|
|
54
|
+
|
|
55
|
+
# codes inherent to atproto-faithful generated didactic models. N815: the
|
|
56
|
+
# python attribute keeps the camelCase wire key. TC001/TC003: didactic
|
|
57
|
+
# resolves annotations eagerly, so the annotated imports must stay at
|
|
58
|
+
# runtime rather than move under TYPE_CHECKING.
|
|
59
|
+
codes: set[str] = set()
|
|
60
|
+
if uses_mixed_case:
|
|
61
|
+
codes.add("N815")
|
|
62
|
+
if uses_datetime:
|
|
63
|
+
codes.add("TC003")
|
|
64
|
+
if uses_blobref:
|
|
65
|
+
codes.add("TC001")
|
|
66
|
+
|
|
67
|
+
blocks: list[str] = []
|
|
68
|
+
blocks.append(_header(manifest_hash, codes))
|
|
69
|
+
blocks.append(_module_docstring(ordered))
|
|
70
|
+
blocks.append(
|
|
71
|
+
_imports(
|
|
72
|
+
uses_datetime=uses_datetime,
|
|
73
|
+
uses_literal=uses_literal,
|
|
74
|
+
uses_blobref=uses_blobref,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
blocks.append(_dunder_all(ordered))
|
|
78
|
+
blocks.extend(_class_text(spec) for spec in ordered)
|
|
79
|
+
body = "\n\n".join(blocks)
|
|
80
|
+
return _suppress_long_lines(body.rstrip()) + "\n"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
_LINE_LIMIT = 88
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _suppress_long_lines(text: str) -> str:
|
|
87
|
+
"""Append an ``E501`` suppression to any over-length line.
|
|
88
|
+
|
|
89
|
+
Lexicon descriptions are copied verbatim and can exceed the line limit;
|
|
90
|
+
``ruff format`` does not wrap string-literal arguments, so an over-length
|
|
91
|
+
description keeps a targeted suppression rather than being truncated.
|
|
92
|
+
"""
|
|
93
|
+
return "\n".join(_suppress_line(line) for line in text.split("\n"))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _suppress_line(line: str) -> str:
|
|
97
|
+
"""Return a line with an ``E501`` suppression when it is over-length."""
|
|
98
|
+
if len(line) > _LINE_LIMIT and "# noqa" not in line:
|
|
99
|
+
return f"{line} # noqa: E501"
|
|
100
|
+
return line
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _order_specs(specs: Sequence[ModelSpec]) -> list[ModelSpec]:
|
|
104
|
+
"""Return specs in a stable, dependency-respecting order.
|
|
105
|
+
|
|
106
|
+
Specs are sorted by class name for determinism, then a stable topological
|
|
107
|
+
pass moves each embed or union-variant target ahead of its referrer so the
|
|
108
|
+
emitted forward references resolve without quoting. Targets defined in other
|
|
109
|
+
namespaces are imported, not reordered.
|
|
110
|
+
"""
|
|
111
|
+
by_name = {spec.name: spec for spec in specs}
|
|
112
|
+
alphabetical = sorted(specs, key=lambda spec: spec.name)
|
|
113
|
+
ordered: list[ModelSpec] = []
|
|
114
|
+
placed: set[str] = set()
|
|
115
|
+
|
|
116
|
+
def visit(spec: ModelSpec, stack: frozenset[str]) -> None:
|
|
117
|
+
if spec.name in placed or spec.name in stack:
|
|
118
|
+
return
|
|
119
|
+
for dep in _local_dependencies(spec, by_name):
|
|
120
|
+
visit(by_name[dep], stack | {spec.name})
|
|
121
|
+
placed.add(spec.name)
|
|
122
|
+
ordered.append(spec)
|
|
123
|
+
|
|
124
|
+
for spec in alphabetical:
|
|
125
|
+
visit(spec, frozenset())
|
|
126
|
+
return ordered
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _local_dependencies(
|
|
130
|
+
spec: ModelSpec,
|
|
131
|
+
by_name: dict[str, ModelSpec],
|
|
132
|
+
) -> list[str]:
|
|
133
|
+
"""Return the names of same-namespace specs ``spec`` depends on."""
|
|
134
|
+
deps: set[str] = {
|
|
135
|
+
variant.target for variant in spec.variants if variant.target in by_name
|
|
136
|
+
}
|
|
137
|
+
field_targets = (_field_local_target(field) for field in spec.fields)
|
|
138
|
+
deps.update(
|
|
139
|
+
target for target in field_targets if target is not None and target in by_name
|
|
140
|
+
)
|
|
141
|
+
return sorted(deps)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _field_local_target(field: FieldSpec) -> str | None:
|
|
145
|
+
"""Return the same-namespace model a field embeds or unions, if any."""
|
|
146
|
+
if field.type_kind in {"embed", "union"}:
|
|
147
|
+
return field.target
|
|
148
|
+
if field.type_kind == "array" and field.item is not None:
|
|
149
|
+
return _field_local_target(field.item)
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _header(manifest_hash: str, noqa_codes: set[str]) -> str:
|
|
154
|
+
"""Return the generated-by header recording the lexicon tree hash.
|
|
155
|
+
|
|
156
|
+
The hash is written without a trailing colon after a bare token so it does
|
|
157
|
+
not trip ruff's commented-out-code heuristic. A file-level ``ruff: noqa``
|
|
158
|
+
directive carries exactly the codes the module triggers so the suppression
|
|
159
|
+
itself is never flagged unused.
|
|
160
|
+
"""
|
|
161
|
+
lines = [_HEADER_LINE, f"# lexicon tree hash {manifest_hash}"]
|
|
162
|
+
directive = _noqa_directive(noqa_codes)
|
|
163
|
+
if directive is not None:
|
|
164
|
+
lines.append(directive)
|
|
165
|
+
return "\n".join(lines)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _noqa_directive(noqa_codes: set[str]) -> str | None:
|
|
169
|
+
"""Return a file-level ruff suppression line, or ``None`` when empty."""
|
|
170
|
+
if not noqa_codes:
|
|
171
|
+
return None
|
|
172
|
+
listed = ", ".join(sorted(noqa_codes))
|
|
173
|
+
return f"# ruff: noqa: {listed}"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _module_docstring(specs: Sequence[ModelSpec]) -> str:
|
|
177
|
+
"""Return the module docstring naming the source namespace."""
|
|
178
|
+
namespace = _shared_namespace(specs)
|
|
179
|
+
return (
|
|
180
|
+
f'"""Generated models for the {namespace} lexicon namespace.\n\n'
|
|
181
|
+
"This module is emitted by ``lairs gen`` from the vendored lexicons and\n"
|
|
182
|
+
"must not be edited by hand. Each class mirrors a lexicon record, object,\n"
|
|
183
|
+
'or union definition.\n"""'
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _shared_namespace(specs: Sequence[ModelSpec]) -> str:
|
|
188
|
+
"""Return the namespace shared by a sequence of specs.
|
|
189
|
+
|
|
190
|
+
Records and objects in one emitted module all share their first three
|
|
191
|
+
dotted nsid components (for example ``pub.layers.annotation``); the module
|
|
192
|
+
docstring names that common prefix rather than each file's nsid.
|
|
193
|
+
"""
|
|
194
|
+
nsids = sorted({spec.nsid for spec in specs})
|
|
195
|
+
if not nsids:
|
|
196
|
+
return "pub.layers"
|
|
197
|
+
prefix_parts = nsids[0].split(".")[:3]
|
|
198
|
+
return ".".join(prefix_parts)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _imports(*, uses_datetime: bool, uses_literal: bool, uses_blobref: bool) -> str:
|
|
202
|
+
"""Return the import block for the emitted module.
|
|
203
|
+
|
|
204
|
+
The generated modules deliberately omit ``from __future__ import
|
|
205
|
+
annotations``: didactic resolves field annotations eagerly at class
|
|
206
|
+
creation, so the annotated imports must be live runtime names, and keeping
|
|
207
|
+
them eager also lets ruff see them as used rather than typing-only.
|
|
208
|
+
"""
|
|
209
|
+
stdlib: list[str] = []
|
|
210
|
+
if uses_datetime:
|
|
211
|
+
stdlib.append("from datetime import datetime")
|
|
212
|
+
if uses_literal:
|
|
213
|
+
stdlib.append("from typing import Literal")
|
|
214
|
+
lines: list[str] = []
|
|
215
|
+
if stdlib:
|
|
216
|
+
lines.extend(stdlib)
|
|
217
|
+
lines.append("")
|
|
218
|
+
lines.append("import didactic.api as dx")
|
|
219
|
+
if uses_blobref:
|
|
220
|
+
lines.append("")
|
|
221
|
+
lines.append("from lairs.records.blobref import BlobRef")
|
|
222
|
+
return "\n".join(lines)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _dunder_all(specs: Sequence[ModelSpec]) -> str:
|
|
226
|
+
"""Return the module ``__all__`` listing every emitted class."""
|
|
227
|
+
names = sorted(_emitted_names(specs))
|
|
228
|
+
quoted = ",\n ".join(f'"{name}"' for name in names)
|
|
229
|
+
if not names:
|
|
230
|
+
return "__all__: list[str] = []"
|
|
231
|
+
return f"__all__ = [\n {quoted},\n]"
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _emitted_names(specs: Sequence[ModelSpec]) -> list[str]:
|
|
235
|
+
"""Return every class name a spec sequence emits, variants included."""
|
|
236
|
+
names: list[str] = []
|
|
237
|
+
for spec in specs:
|
|
238
|
+
names.append(spec.name)
|
|
239
|
+
names.extend(variant.class_name for variant in spec.variants)
|
|
240
|
+
return names
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _class_text(spec: ModelSpec) -> str:
|
|
244
|
+
"""Render a single spec to a class definition (or union family)."""
|
|
245
|
+
if spec.is_union:
|
|
246
|
+
return _union_text(spec)
|
|
247
|
+
return _model_text(spec)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _model_text(spec: ModelSpec) -> str:
|
|
251
|
+
"""Render a record or object spec to a ``dx.Model`` subclass."""
|
|
252
|
+
docstring = _class_docstring(spec.description or f"The {spec.def_name} definition.")
|
|
253
|
+
lines = [f"class {spec.name}(dx.Model):", docstring, ""]
|
|
254
|
+
for field in spec.fields:
|
|
255
|
+
lines.extend(_field_lines(field))
|
|
256
|
+
return "\n".join(lines)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _union_text(spec: ModelSpec) -> str:
|
|
260
|
+
"""Render a formal union spec to a ``dx.TaggedUnion`` family."""
|
|
261
|
+
discriminator = spec.discriminator or "kind"
|
|
262
|
+
root = "\n".join(
|
|
263
|
+
[
|
|
264
|
+
f'class {spec.name}(dx.TaggedUnion, discriminator="{discriminator}"):',
|
|
265
|
+
_class_docstring(spec.description or f"The {spec.def_name} union."),
|
|
266
|
+
"",
|
|
267
|
+
]
|
|
268
|
+
)
|
|
269
|
+
blocks = [root]
|
|
270
|
+
blocks.extend(
|
|
271
|
+
_variant_text(spec, variant, discriminator) for variant in spec.variants
|
|
272
|
+
)
|
|
273
|
+
return "\n\n\n".join(blocks)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _variant_text(
|
|
277
|
+
spec: ModelSpec,
|
|
278
|
+
variant: VariantSpec,
|
|
279
|
+
discriminator: str,
|
|
280
|
+
) -> str:
|
|
281
|
+
"""Render a single union variant subclass."""
|
|
282
|
+
lines = [f"class {variant.class_name}({spec.name}):"]
|
|
283
|
+
lines.append(
|
|
284
|
+
_class_docstring(f"The {variant.discriminator_value!r} member of {spec.name}.")
|
|
285
|
+
)
|
|
286
|
+
lines.append("")
|
|
287
|
+
lines.append(
|
|
288
|
+
f" {discriminator}: Literal[{variant.discriminator_value!r}] = dx.field("
|
|
289
|
+
)
|
|
290
|
+
lines.append(f" default={variant.discriminator_value!r},")
|
|
291
|
+
lines.append(
|
|
292
|
+
f' description="discriminator pinning this member to '
|
|
293
|
+
f'{variant.discriminator_value}",'
|
|
294
|
+
)
|
|
295
|
+
lines.append(" )")
|
|
296
|
+
lines.append(f" value: {variant.target} | None = dx.field(")
|
|
297
|
+
lines.append(" default=None,")
|
|
298
|
+
lines.append(' description="the wrapped member model",')
|
|
299
|
+
lines.append(" )")
|
|
300
|
+
return "\n".join(lines)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _field_lines(field: FieldSpec) -> list[str]:
|
|
304
|
+
"""Render a single field declaration to source lines.
|
|
305
|
+
|
|
306
|
+
Lexicon property names are camelCase wire keys that must round-trip
|
|
307
|
+
verbatim through ATProto JSON, so the python attribute keeps the camelCase
|
|
308
|
+
name; the file-level ``N815`` suppression in the header covers them.
|
|
309
|
+
"""
|
|
310
|
+
annotation = _field_annotation(field)
|
|
311
|
+
args = _field_args(field)
|
|
312
|
+
lines = [f" {field.name}: {annotation} = dx.field("]
|
|
313
|
+
lines.extend(f" {arg}" for arg in args)
|
|
314
|
+
lines.append(" )")
|
|
315
|
+
return lines
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _is_mixed_case(name: str) -> bool:
|
|
319
|
+
"""Return whether a field name is mixedCase (carries an inner capital).
|
|
320
|
+
|
|
321
|
+
A purely lowercase or snake_case name does not trip ``N815``; only names
|
|
322
|
+
with an interior upper-case letter do.
|
|
323
|
+
"""
|
|
324
|
+
return name != name.lower() and "_" not in name
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _field_annotation(field: FieldSpec) -> str:
|
|
328
|
+
"""Return the python annotation source for a field."""
|
|
329
|
+
base = _base_annotation(field)
|
|
330
|
+
if field.required:
|
|
331
|
+
return base
|
|
332
|
+
return f"{base} | None"
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# scalar field kinds whose python annotation is a fixed builtin or alias.
|
|
336
|
+
_SCALAR_ANNOTATIONS: dict[str, str] = {
|
|
337
|
+
"str": "str",
|
|
338
|
+
"int": "int",
|
|
339
|
+
"bool": "bool",
|
|
340
|
+
"datetime": "datetime",
|
|
341
|
+
"bytes": "bytes",
|
|
342
|
+
"blob": "BlobRef",
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def _base_annotation(field: FieldSpec) -> str:
|
|
347
|
+
"""Return the unwrapped python annotation source for a field."""
|
|
348
|
+
kind = field.type_kind
|
|
349
|
+
if kind == "embed":
|
|
350
|
+
return f"dx.Embed[{field.target}]"
|
|
351
|
+
if kind == "union":
|
|
352
|
+
return f"{field.target}"
|
|
353
|
+
if kind == "array":
|
|
354
|
+
item = field.item
|
|
355
|
+
element = _base_annotation(item) if item is not None else "str"
|
|
356
|
+
return f"tuple[{element}, ...]"
|
|
357
|
+
return _SCALAR_ANNOTATIONS.get(kind, "str")
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _field_args(field: FieldSpec) -> list[str]:
|
|
361
|
+
"""Return the ``dx.field`` keyword argument source lines for a field."""
|
|
362
|
+
args: list[str] = []
|
|
363
|
+
if not field.required:
|
|
364
|
+
if field.type_kind == "array":
|
|
365
|
+
args.append("default_factory=tuple,")
|
|
366
|
+
else:
|
|
367
|
+
args.append("default=None,")
|
|
368
|
+
if field.description is not None:
|
|
369
|
+
args.append(f"description={_py_str(field.description)},")
|
|
370
|
+
extras = _field_extras(field)
|
|
371
|
+
if extras:
|
|
372
|
+
args.append(f"extras={{{extras}}},")
|
|
373
|
+
if field.type_kind == "bytes":
|
|
374
|
+
args.append("opaque=True,")
|
|
375
|
+
if not args:
|
|
376
|
+
# an unconditional placeholder keeps the call well-formed; required
|
|
377
|
+
# scalar fields with no description fall here
|
|
378
|
+
args.append('description="generated field",')
|
|
379
|
+
return args
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def _field_extras(field: FieldSpec) -> str:
|
|
383
|
+
"""Return the ``extras`` mapping literal contents for a field, if any."""
|
|
384
|
+
entries: list[str] = []
|
|
385
|
+
if field.string_format is not None:
|
|
386
|
+
entries.append(f'"format": {_py_str(field.string_format)}')
|
|
387
|
+
if field.known_values:
|
|
388
|
+
values = ", ".join(_py_str(value) for value in field.known_values)
|
|
389
|
+
entries.append(f'"knownValues": ({values},)')
|
|
390
|
+
if field.minimum is not None:
|
|
391
|
+
entries.append(f'"minimum": {field.minimum}')
|
|
392
|
+
if field.maximum is not None:
|
|
393
|
+
entries.append(f'"maximum": {field.maximum}')
|
|
394
|
+
if field.min_length is not None:
|
|
395
|
+
entries.append(f'"minLength": {field.min_length}')
|
|
396
|
+
if field.max_length is not None:
|
|
397
|
+
entries.append(f'"maxLength": {field.max_length}')
|
|
398
|
+
return ", ".join(entries)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _class_docstring(summary: str) -> str:
|
|
402
|
+
"""Return an indented one-line numpy-style class docstring."""
|
|
403
|
+
text = " ".join(summary.split())
|
|
404
|
+
if not text.endswith("."):
|
|
405
|
+
text = f"{text}."
|
|
406
|
+
return f' """{_escape_docstring(text)}"""'
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def _escape_docstring(text: str) -> str:
|
|
410
|
+
"""Escape a docstring body so it is a valid triple-quoted literal."""
|
|
411
|
+
return text.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _py_str(value: str) -> str:
|
|
415
|
+
"""Return a python source string literal for ``value``."""
|
|
416
|
+
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
|
|
417
|
+
return f'"{escaped}"'
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _spec_uses_datetime(spec: ModelSpec) -> bool:
|
|
421
|
+
"""Return whether any field of a spec is datetime-typed."""
|
|
422
|
+
return any(_field_uses_datetime(field) for field in spec.fields)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def _field_uses_datetime(field: FieldSpec) -> bool:
|
|
426
|
+
"""Return whether a field (or its array element) is datetime-typed."""
|
|
427
|
+
if field.type_kind == "datetime":
|
|
428
|
+
return True
|
|
429
|
+
if field.type_kind == "array" and field.item is not None:
|
|
430
|
+
return _field_uses_datetime(field.item)
|
|
431
|
+
return False
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _spec_uses_mixed_case(spec: ModelSpec) -> bool:
|
|
435
|
+
"""Return whether any field of a spec carries a camelCase wire name."""
|
|
436
|
+
return any(_is_mixed_case(field.name) for field in spec.fields)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _spec_uses_blobref(spec: ModelSpec) -> bool:
|
|
440
|
+
"""Return whether any field of a spec is a blob reference."""
|
|
441
|
+
return any(_field_uses_blobref(field) for field in spec.fields)
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _field_uses_blobref(field: FieldSpec) -> bool:
|
|
445
|
+
"""Return whether a field (or its array element) is a blob reference."""
|
|
446
|
+
if field.type_kind == "blob":
|
|
447
|
+
return True
|
|
448
|
+
if field.type_kind == "array" and field.item is not None:
|
|
449
|
+
return _field_uses_blobref(field.item)
|
|
450
|
+
return False
|