lairs 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. lairs/__init__.py +142 -0
  2. lairs/_aturi.py +59 -0
  3. lairs/_codegen/__init__.py +30 -0
  4. lairs/_codegen/emit.py +450 -0
  5. lairs/_codegen/manifest.py +99 -0
  6. lairs/_codegen/pipeline.py +366 -0
  7. lairs/_codegen/schema_to_spec.py +627 -0
  8. lairs/_types.py +38 -0
  9. lairs/atproto/__init__.py +84 -0
  10. lairs/atproto/_car.py +107 -0
  11. lairs/atproto/appview.py +238 -0
  12. lairs/atproto/auth.py +383 -0
  13. lairs/atproto/blobs.py +250 -0
  14. lairs/atproto/firehose.py +374 -0
  15. lairs/atproto/identity.py +419 -0
  16. lairs/atproto/pds.py +873 -0
  17. lairs/author/__init__.py +60 -0
  18. lairs/author/builders.py +595 -0
  19. lairs/author/publish.py +1391 -0
  20. lairs/cli.py +1403 -0
  21. lairs/data/__init__.py +34 -0
  22. lairs/data/corpus.py +806 -0
  23. lairs/data/dataset.py +438 -0
  24. lairs/data/features.py +252 -0
  25. lairs/discovery/__init__.py +46 -0
  26. lairs/discovery/accelerator.py +137 -0
  27. lairs/discovery/actor.py +299 -0
  28. lairs/discovery/cards.py +335 -0
  29. lairs/discovery/federated.py +144 -0
  30. lairs/discovery/index.py +341 -0
  31. lairs/discovery/ingest.py +362 -0
  32. lairs/discovery/links.py +169 -0
  33. lairs/discovery/models.py +221 -0
  34. lairs/discovery/query.py +175 -0
  35. lairs/discovery/summary.py +283 -0
  36. lairs/integrations/__init__.py +43 -0
  37. lairs/integrations/codecs/__init__.py +82 -0
  38. lairs/integrations/codecs/brat.py +795 -0
  39. lairs/integrations/codecs/conllu.py +987 -0
  40. lairs/integrations/hf/__init__.py +41 -0
  41. lairs/integrations/hf/datasets.py +527 -0
  42. lairs/integrations/hf/hub.py +480 -0
  43. lairs/integrations/kb/__init__.py +78 -0
  44. lairs/integrations/kb/glazing.py +480 -0
  45. lairs/integrations/kb/reconciliation.py +475 -0
  46. lairs/integrations/kb/wikidata.py +590 -0
  47. lairs/integrations/ports.py +217 -0
  48. lairs/integrations/registry.py +279 -0
  49. lairs/integrations/tfdata.py +428 -0
  50. lairs/integrations/torch.py +497 -0
  51. lairs/integrations/tracking.py +297 -0
  52. lairs/integrations/webdataset.py +548 -0
  53. lairs/lexicons/MANIFEST.toml +18 -0
  54. lairs/lexicons/README.md +17 -0
  55. lairs/lexicons/pub/layers/alignment/alignment.json +130 -0
  56. lairs/lexicons/pub/layers/alignment/getAlignment.json +30 -0
  57. lairs/lexicons/pub/layers/alignment/listAlignments.json +43 -0
  58. lairs/lexicons/pub/layers/annotation/annotationLayer.json +239 -0
  59. lairs/lexicons/pub/layers/annotation/clusterSet.json +86 -0
  60. lairs/lexicons/pub/layers/annotation/defs.json +165 -0
  61. lairs/lexicons/pub/layers/annotation/getAnnotationLayer.json +30 -0
  62. lairs/lexicons/pub/layers/annotation/getClusterSet.json +30 -0
  63. lairs/lexicons/pub/layers/annotation/listAnnotationLayers.json +44 -0
  64. lairs/lexicons/pub/layers/annotation/listClusterSets.json +43 -0
  65. lairs/lexicons/pub/layers/authAnnotator.json +64 -0
  66. lairs/lexicons/pub/layers/authCorpusManager.json +64 -0
  67. lairs/lexicons/pub/layers/authExperimenter.json +52 -0
  68. lairs/lexicons/pub/layers/authFull.json +110 -0
  69. lairs/lexicons/pub/layers/authOntologyEditor.json +46 -0
  70. lairs/lexicons/pub/layers/authReadOnly.json +73 -0
  71. lairs/lexicons/pub/layers/changelog/defs.json +107 -0
  72. lairs/lexicons/pub/layers/changelog/entry.json +56 -0
  73. lairs/lexicons/pub/layers/changelog/getEntry.json +30 -0
  74. lairs/lexicons/pub/layers/changelog/listByCollection.json +45 -0
  75. lairs/lexicons/pub/layers/changelog/listEntries.json +46 -0
  76. lairs/lexicons/pub/layers/corpus/corpus.json +110 -0
  77. lairs/lexicons/pub/layers/corpus/defs.json +173 -0
  78. lairs/lexicons/pub/layers/corpus/getCorpus.json +30 -0
  79. lairs/lexicons/pub/layers/corpus/getMembership.json +30 -0
  80. lairs/lexicons/pub/layers/corpus/listCorpora.json +85 -0
  81. lairs/lexicons/pub/layers/corpus/listMemberships.json +43 -0
  82. lairs/lexicons/pub/layers/corpus/membership.json +55 -0
  83. lairs/lexicons/pub/layers/defs.json +972 -0
  84. lairs/lexicons/pub/layers/eprint/dataLink.json +90 -0
  85. lairs/lexicons/pub/layers/eprint/defs.json +248 -0
  86. lairs/lexicons/pub/layers/eprint/eprint.json +119 -0
  87. lairs/lexicons/pub/layers/eprint/getDataLink.json +30 -0
  88. lairs/lexicons/pub/layers/eprint/getEprint.json +30 -0
  89. lairs/lexicons/pub/layers/eprint/listDataLinks.json +43 -0
  90. lairs/lexicons/pub/layers/eprint/listEprints.json +44 -0
  91. lairs/lexicons/pub/layers/expression/expression.json +144 -0
  92. lairs/lexicons/pub/layers/expression/getExpression.json +30 -0
  93. lairs/lexicons/pub/layers/expression/listExpressions.json +90 -0
  94. lairs/lexicons/pub/layers/graph/defs.json +47 -0
  95. lairs/lexicons/pub/layers/graph/getGraphEdge.json +30 -0
  96. lairs/lexicons/pub/layers/graph/getGraphEdgeSet.json +30 -0
  97. lairs/lexicons/pub/layers/graph/getGraphNode.json +30 -0
  98. lairs/lexicons/pub/layers/graph/graphEdge.json +89 -0
  99. lairs/lexicons/pub/layers/graph/graphEdgeSet.json +98 -0
  100. lairs/lexicons/pub/layers/graph/graphNode.json +55 -0
  101. lairs/lexicons/pub/layers/graph/listGraphEdgeSets.json +44 -0
  102. lairs/lexicons/pub/layers/graph/listGraphEdges.json +45 -0
  103. lairs/lexicons/pub/layers/graph/listGraphNodes.json +43 -0
  104. lairs/lexicons/pub/layers/integration/README.md +212 -0
  105. lairs/lexicons/pub/layers/integration/applyLens.json +61 -0
  106. lairs/lexicons/pub/layers/integration/getExternal.json +45 -0
  107. lairs/lexicons/pub/layers/integration/listExternal.json +53 -0
  108. lairs/lexicons/pub/layers/judgment/agreementReport.json +62 -0
  109. lairs/lexicons/pub/layers/judgment/defs.json +240 -0
  110. lairs/lexicons/pub/layers/judgment/experimentDef.json +182 -0
  111. lairs/lexicons/pub/layers/judgment/getAgreementReport.json +30 -0
  112. lairs/lexicons/pub/layers/judgment/getExperimentDef.json +30 -0
  113. lairs/lexicons/pub/layers/judgment/getJudgmentSet.json +30 -0
  114. lairs/lexicons/pub/layers/judgment/judgmentSet.json +55 -0
  115. lairs/lexicons/pub/layers/judgment/listAgreementReports.json +43 -0
  116. lairs/lexicons/pub/layers/judgment/listExperimentDefs.json +44 -0
  117. lairs/lexicons/pub/layers/judgment/listJudgmentSets.json +42 -0
  118. lairs/lexicons/pub/layers/media/defs.json +149 -0
  119. lairs/lexicons/pub/layers/media/getMedia.json +30 -0
  120. lairs/lexicons/pub/layers/media/listMedia.json +43 -0
  121. lairs/lexicons/pub/layers/media/media.json +147 -0
  122. lairs/lexicons/pub/layers/ontology/defs.json +67 -0
  123. lairs/lexicons/pub/layers/ontology/getOntology.json +30 -0
  124. lairs/lexicons/pub/layers/ontology/getTypeDef.json +30 -0
  125. lairs/lexicons/pub/layers/ontology/listOntologies.json +43 -0
  126. lairs/lexicons/pub/layers/ontology/listTypeDefs.json +43 -0
  127. lairs/lexicons/pub/layers/ontology/ontology.json +80 -0
  128. lairs/lexicons/pub/layers/ontology/typeDef.json +83 -0
  129. lairs/lexicons/pub/layers/persona/getPersona.json +30 -0
  130. lairs/lexicons/pub/layers/persona/listPersonas.json +44 -0
  131. lairs/lexicons/pub/layers/persona/persona.json +104 -0
  132. lairs/lexicons/pub/layers/resource/collection.json +102 -0
  133. lairs/lexicons/pub/layers/resource/collectionMembership.json +45 -0
  134. lairs/lexicons/pub/layers/resource/defs.json +164 -0
  135. lairs/lexicons/pub/layers/resource/entry.json +100 -0
  136. lairs/lexicons/pub/layers/resource/filling.json +75 -0
  137. lairs/lexicons/pub/layers/resource/getCollection.json +30 -0
  138. lairs/lexicons/pub/layers/resource/getCollectionMembership.json +30 -0
  139. lairs/lexicons/pub/layers/resource/getEntry.json +30 -0
  140. lairs/lexicons/pub/layers/resource/getFilling.json +30 -0
  141. lairs/lexicons/pub/layers/resource/getTemplate.json +30 -0
  142. lairs/lexicons/pub/layers/resource/getTemplateComposition.json +30 -0
  143. lairs/lexicons/pub/layers/resource/listCollectionMemberships.json +42 -0
  144. lairs/lexicons/pub/layers/resource/listCollections.json +85 -0
  145. lairs/lexicons/pub/layers/resource/listEntries.json +82 -0
  146. lairs/lexicons/pub/layers/resource/listFillings.json +43 -0
  147. lairs/lexicons/pub/layers/resource/listTemplateCompositions.json +43 -0
  148. lairs/lexicons/pub/layers/resource/listTemplates.json +82 -0
  149. lairs/lexicons/pub/layers/resource/template.json +90 -0
  150. lairs/lexicons/pub/layers/resource/templateComposition.json +54 -0
  151. lairs/lexicons/pub/layers/segmentation/defs.json +71 -0
  152. lairs/lexicons/pub/layers/segmentation/getSegmentation.json +30 -0
  153. lairs/lexicons/pub/layers/segmentation/listSegmentations.json +42 -0
  154. lairs/lexicons/pub/layers/segmentation/segmentation.json +72 -0
  155. lairs/media/__init__.py +29 -0
  156. lairs/media/anchors.py +381 -0
  157. lairs/media/audio.py +214 -0
  158. lairs/media/neural.py +295 -0
  159. lairs/media/resolve.py +312 -0
  160. lairs/media/video.py +289 -0
  161. lairs/py.typed +0 -0
  162. lairs/records/__init__.py +55 -0
  163. lairs/records/_generated/.gitkeep +0 -0
  164. lairs/records/_generated/__init__.py +42 -0
  165. lairs/records/_generated/alignment.py +126 -0
  166. lairs/records/_generated/annotation.py +424 -0
  167. lairs/records/_generated/changelog.py +144 -0
  168. lairs/records/_generated/corpus.py +319 -0
  169. lairs/records/_generated/defs.py +1075 -0
  170. lairs/records/_generated/eprint.py +445 -0
  171. lairs/records/_generated/expression.py +127 -0
  172. lairs/records/_generated/graph.py +333 -0
  173. lairs/records/_generated/judgment.py +506 -0
  174. lairs/records/_generated/media.py +302 -0
  175. lairs/records/_generated/ontology.py +197 -0
  176. lairs/records/_generated/persona.py +122 -0
  177. lairs/records/_generated/resource.py +474 -0
  178. lairs/records/_generated/segmentation.py +129 -0
  179. lairs/records/blobref.py +80 -0
  180. lairs/records/views.py +123 -0
  181. lairs/store/__init__.py +32 -0
  182. lairs/store/arrow.py +596 -0
  183. lairs/store/blobcache.py +228 -0
  184. lairs/store/pool.py +255 -0
  185. lairs/store/repository.py +595 -0
  186. lairs/tui/__init__.py +93 -0
  187. lairs/tui/app.py +193 -0
  188. lairs/tui/browse.py +234 -0
  189. lairs/tui/query.py +555 -0
  190. lairs/tui/registry.py +75 -0
  191. lairs/tui/screens/__init__.py +9 -0
  192. lairs/tui/screens/browse.py +200 -0
  193. lairs/tui/screens/explore.py +204 -0
  194. lairs/tui/screens/query.py +238 -0
  195. lairs/tui/styles.tcss +219 -0
  196. lairs/tui/views.py +964 -0
  197. lairs/tui/viz.py +967 -0
  198. lairs-0.1.0.dist-info/METADATA +216 -0
  199. lairs-0.1.0.dist-info/RECORD +202 -0
  200. lairs-0.1.0.dist-info/WHEEL +4 -0
  201. lairs-0.1.0.dist-info/entry_points.txt +17 -0
  202. lairs-0.1.0.dist-info/licenses/LICENSE +21 -0
lairs/__init__.py ADDED
@@ -0,0 +1,142 @@
1
+ """lairs: a read/write dataset client for the Layers format.
2
+
3
+ lairs reads and writes ``pub.layers.*`` records over ATProto, validates them
4
+ against models generated from the Layers lexicons, and exposes them through a
5
+ ``datasets``-like API with first-class tooling for audio, video, and neural
6
+ modalities.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from importlib.metadata import PackageNotFoundError
12
+ from importlib.metadata import version as _distribution_version
13
+ from typing import TYPE_CHECKING
14
+
15
+ from lairs.atproto.auth import Session, authed_client, login
16
+ from lairs.data import Corpus, load_corpus
17
+ from lairs.discovery import (
18
+ DatasetFilter,
19
+ DatasetSummary,
20
+ RepoTableOfContents,
21
+ discover_datasets,
22
+ list_datasets,
23
+ table_of_contents,
24
+ )
25
+ from lairs.integrations.registry import (
26
+ get_codec,
27
+ get_exporter,
28
+ get_knowledge_base,
29
+ )
30
+ from lairs.records.blobref import BlobRef
31
+
32
+ if TYPE_CHECKING:
33
+ from lairs.integrations.ports import Codec, Exporter, KnowledgeBase
34
+
35
+ __all__ = [
36
+ "BlobRef",
37
+ "Corpus",
38
+ "DatasetFilter",
39
+ "DatasetSummary",
40
+ "RepoTableOfContents",
41
+ "Session",
42
+ "__version__",
43
+ "authed_client",
44
+ "codec",
45
+ "discover_datasets",
46
+ "exporter",
47
+ "knowledge_base",
48
+ "list_datasets",
49
+ "load_corpus",
50
+ "login",
51
+ "table_of_contents",
52
+ ]
53
+
54
+ _FALLBACK_VERSION = "0.1.0"
55
+ """The version reported when no installed distribution metadata is available.
56
+
57
+ This literal is the single source of truth for source and editable trees where
58
+ ``importlib.metadata`` cannot find an installed ``lairs`` distribution. It must
59
+ be kept in step with the ``version`` field in ``pyproject.toml``.
60
+ """
61
+
62
+
63
+ def _resolve_version() -> str:
64
+ """Return the installed distribution version, falling back to a literal.
65
+
66
+ Returns
67
+ -------
68
+ str
69
+ The version string from the installed ``lairs`` distribution metadata,
70
+ or ``_FALLBACK_VERSION`` when the package is not installed (for example
71
+ when running from a source checkout).
72
+ """
73
+ try:
74
+ return _distribution_version("lairs")
75
+ except PackageNotFoundError:
76
+ return _FALLBACK_VERSION
77
+
78
+
79
+ __version__ = _resolve_version()
80
+
81
+
82
+ def codec(name: str) -> type[Codec]:
83
+ """Look up a registered codec adapter class by name.
84
+
85
+ Parameters
86
+ ----------
87
+ name : str
88
+ The codec name (for example ``"conllu"`` or ``"brat"``).
89
+
90
+ Returns
91
+ -------
92
+ type
93
+ The registered codec class.
94
+
95
+ Raises
96
+ ------
97
+ lairs.integrations.registry.UnknownAdapterError
98
+ If no codec is registered under ``name``.
99
+ """
100
+ return get_codec(name)
101
+
102
+
103
+ def exporter(name: str) -> type[Exporter]:
104
+ """Look up a registered exporter adapter class by name.
105
+
106
+ Parameters
107
+ ----------
108
+ name : str
109
+ The exporter name (for example ``"hf"`` or ``"torch"``).
110
+
111
+ Returns
112
+ -------
113
+ type
114
+ The registered exporter class.
115
+
116
+ Raises
117
+ ------
118
+ lairs.integrations.registry.UnknownAdapterError
119
+ If no exporter is registered under ``name``.
120
+ """
121
+ return get_exporter(name)
122
+
123
+
124
+ def knowledge_base(name: str) -> type[KnowledgeBase]:
125
+ """Look up a registered knowledge-base adapter class by name.
126
+
127
+ Parameters
128
+ ----------
129
+ name : str
130
+ The knowledge-base name (for example ``"wikidata"``).
131
+
132
+ Returns
133
+ -------
134
+ type
135
+ The registered knowledge-base class.
136
+
137
+ Raises
138
+ ------
139
+ lairs.integrations.registry.UnknownAdapterError
140
+ If no knowledge base is registered under ``name``.
141
+ """
142
+ return get_knowledge_base(name)
lairs/_aturi.py ADDED
@@ -0,0 +1,59 @@
1
+ """AT-URI parsing helpers shared across lairs.
2
+
3
+ Small, dependency-free helpers for pulling the authority and collection segments
4
+ out of an ``at://`` URI. Centralised here so the discovery, CLI, and data layers
5
+ parse AT-URIs the same way.
6
+
7
+ These helpers are positional string splitters, not validators. They assume a
8
+ well-formed ``at://authority/collection/rkey`` URI and return an empty string
9
+ for a missing segment; they do not check the ``at://`` scheme or the authority
10
+ shape, so malformed input yields a best-effort segment rather than an error.
11
+ Callers that need validation must do it before calling.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ __all__ = ["authority_of", "nsid_of"]
17
+
18
+ _AT_URI_PREFIX = "at://"
19
+ """The scheme prefix every AT-URI carries."""
20
+
21
+ _MIN_PARTS_WITH_COLLECTION = 2
22
+ """The number of path segments an AT-URI needs to carry a collection NSID."""
23
+
24
+
25
+ def authority_of(uri: str) -> str:
26
+ """Return the authority (DID or handle) segment of an AT-URI.
27
+
28
+ Parameters
29
+ ----------
30
+ uri : str
31
+ The AT-URI to parse.
32
+
33
+ Returns
34
+ -------
35
+ str
36
+ The authority segment, or an empty string when ``uri`` is empty.
37
+ """
38
+ body = uri.removeprefix(_AT_URI_PREFIX)
39
+ return body.split("/", 1)[0] if body else ""
40
+
41
+
42
+ def nsid_of(uri: str) -> str:
43
+ """Return the collection NSID segment of an AT-URI.
44
+
45
+ Parameters
46
+ ----------
47
+ uri : str
48
+ The AT-URI to parse.
49
+
50
+ Returns
51
+ -------
52
+ str
53
+ The collection NSID, or an empty string when the URI has no collection.
54
+ """
55
+ body = uri.removeprefix(_AT_URI_PREFIX)
56
+ parts = body.split("/")
57
+ if len(parts) >= _MIN_PARTS_WITH_COLLECTION:
58
+ return parts[1]
59
+ return ""
@@ -0,0 +1,30 @@
1
+ """Codegen pipeline that turns vendored lexicons into generated models.
2
+
3
+ The pipeline parses each lexicon into a panproto ``Schema``, walks it into
4
+ didactic spec dicts, builds models, and emits Python module text.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from lairs._codegen.emit import emit_module
10
+ from lairs._codegen.manifest import Manifest, load_manifest
11
+ from lairs._codegen.pipeline import check, generate, namespace_specs
12
+ from lairs._codegen.schema_to_spec import (
13
+ FieldSpec,
14
+ ModelSpec,
15
+ VariantSpec,
16
+ schema_to_specs,
17
+ )
18
+
19
+ __all__ = [
20
+ "FieldSpec",
21
+ "Manifest",
22
+ "ModelSpec",
23
+ "VariantSpec",
24
+ "check",
25
+ "emit_module",
26
+ "generate",
27
+ "load_manifest",
28
+ "namespace_specs",
29
+ "schema_to_specs",
30
+ ]
lairs/_codegen/emit.py ADDED
@@ -0,0 +1,450 @@
1
+ """Emit Python module text for generated models.
2
+
3
+ Renders :class:`~lairs._codegen.schema_to_spec.ModelSpec` value models into
4
+ committed module source text with a generated-by header and the source manifest
5
+ hash. Emission is deterministic (stable class ordering and stable field
6
+ ordering) so the ``lairs gen --check`` drift gate is meaningful. The emitted
7
+ modules are the import surface of :mod:`lairs.records`; they are rich didactic
8
+ models carrying descriptions, optionality, refined value types, integer ranges,
9
+ knownValues, and union discriminators, which the lossy spec-synthesis path could
10
+ not reconstruct.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Sequence
19
+
20
+ from lairs._codegen.schema_to_spec import FieldSpec, ModelSpec, VariantSpec
21
+
22
+ __all__ = ["emit_module"]
23
+
24
+ _HEADER_LINE = "# generated by lairs gen; do not edit"
25
+
26
+
27
+ def emit_module(
28
+ specs: Sequence[ModelSpec],
29
+ *,
30
+ manifest_hash: str,
31
+ ) -> str:
32
+ """Render record and union specs to Python module source text.
33
+
34
+ Parameters
35
+ ----------
36
+ specs : collections.abc.Sequence of lairs._codegen.schema_to_spec.ModelSpec
37
+ The specs for one namespace, already ordered so embed targets precede
38
+ the models that embed them.
39
+ manifest_hash : str
40
+ The content hash of the source lexicon tree, recorded in the header so
41
+ the committed module records the lexicon revision it was generated from.
42
+
43
+ Returns
44
+ -------
45
+ str
46
+ The module source text, with a generated-by header, the manifest hash,
47
+ a module docstring, imports, the emitted classes, and an ``__all__``.
48
+ """
49
+ ordered = _order_specs(specs)
50
+ uses_datetime = any(_spec_uses_datetime(spec) for spec in ordered)
51
+ uses_literal = any(spec.is_union for spec in ordered)
52
+ uses_blobref = any(_spec_uses_blobref(spec) for spec in ordered)
53
+ uses_mixed_case = any(_spec_uses_mixed_case(spec) for spec in ordered)
54
+
55
+ # codes inherent to atproto-faithful generated didactic models. N815: the
56
+ # python attribute keeps the camelCase wire key. TC001/TC003: didactic
57
+ # resolves annotations eagerly, so the annotated imports must stay at
58
+ # runtime rather than move under TYPE_CHECKING.
59
+ codes: set[str] = set()
60
+ if uses_mixed_case:
61
+ codes.add("N815")
62
+ if uses_datetime:
63
+ codes.add("TC003")
64
+ if uses_blobref:
65
+ codes.add("TC001")
66
+
67
+ blocks: list[str] = []
68
+ blocks.append(_header(manifest_hash, codes))
69
+ blocks.append(_module_docstring(ordered))
70
+ blocks.append(
71
+ _imports(
72
+ uses_datetime=uses_datetime,
73
+ uses_literal=uses_literal,
74
+ uses_blobref=uses_blobref,
75
+ )
76
+ )
77
+ blocks.append(_dunder_all(ordered))
78
+ blocks.extend(_class_text(spec) for spec in ordered)
79
+ body = "\n\n".join(blocks)
80
+ return _suppress_long_lines(body.rstrip()) + "\n"
81
+
82
+
83
+ _LINE_LIMIT = 88
84
+
85
+
86
+ def _suppress_long_lines(text: str) -> str:
87
+ """Append an ``E501`` suppression to any over-length line.
88
+
89
+ Lexicon descriptions are copied verbatim and can exceed the line limit;
90
+ ``ruff format`` does not wrap string-literal arguments, so an over-length
91
+ description keeps a targeted suppression rather than being truncated.
92
+ """
93
+ return "\n".join(_suppress_line(line) for line in text.split("\n"))
94
+
95
+
96
+ def _suppress_line(line: str) -> str:
97
+ """Return a line with an ``E501`` suppression when it is over-length."""
98
+ if len(line) > _LINE_LIMIT and "# noqa" not in line:
99
+ return f"{line} # noqa: E501"
100
+ return line
101
+
102
+
103
+ def _order_specs(specs: Sequence[ModelSpec]) -> list[ModelSpec]:
104
+ """Return specs in a stable, dependency-respecting order.
105
+
106
+ Specs are sorted by class name for determinism, then a stable topological
107
+ pass moves each embed or union-variant target ahead of its referrer so the
108
+ emitted forward references resolve without quoting. Targets defined in other
109
+ namespaces are imported, not reordered.
110
+ """
111
+ by_name = {spec.name: spec for spec in specs}
112
+ alphabetical = sorted(specs, key=lambda spec: spec.name)
113
+ ordered: list[ModelSpec] = []
114
+ placed: set[str] = set()
115
+
116
+ def visit(spec: ModelSpec, stack: frozenset[str]) -> None:
117
+ if spec.name in placed or spec.name in stack:
118
+ return
119
+ for dep in _local_dependencies(spec, by_name):
120
+ visit(by_name[dep], stack | {spec.name})
121
+ placed.add(spec.name)
122
+ ordered.append(spec)
123
+
124
+ for spec in alphabetical:
125
+ visit(spec, frozenset())
126
+ return ordered
127
+
128
+
129
+ def _local_dependencies(
130
+ spec: ModelSpec,
131
+ by_name: dict[str, ModelSpec],
132
+ ) -> list[str]:
133
+ """Return the names of same-namespace specs ``spec`` depends on."""
134
+ deps: set[str] = {
135
+ variant.target for variant in spec.variants if variant.target in by_name
136
+ }
137
+ field_targets = (_field_local_target(field) for field in spec.fields)
138
+ deps.update(
139
+ target for target in field_targets if target is not None and target in by_name
140
+ )
141
+ return sorted(deps)
142
+
143
+
144
+ def _field_local_target(field: FieldSpec) -> str | None:
145
+ """Return the same-namespace model a field embeds or unions, if any."""
146
+ if field.type_kind in {"embed", "union"}:
147
+ return field.target
148
+ if field.type_kind == "array" and field.item is not None:
149
+ return _field_local_target(field.item)
150
+ return None
151
+
152
+
153
+ def _header(manifest_hash: str, noqa_codes: set[str]) -> str:
154
+ """Return the generated-by header recording the lexicon tree hash.
155
+
156
+ The hash is written without a trailing colon after a bare token so it does
157
+ not trip ruff's commented-out-code heuristic. A file-level ``ruff: noqa``
158
+ directive carries exactly the codes the module triggers so the suppression
159
+ itself is never flagged unused.
160
+ """
161
+ lines = [_HEADER_LINE, f"# lexicon tree hash {manifest_hash}"]
162
+ directive = _noqa_directive(noqa_codes)
163
+ if directive is not None:
164
+ lines.append(directive)
165
+ return "\n".join(lines)
166
+
167
+
168
+ def _noqa_directive(noqa_codes: set[str]) -> str | None:
169
+ """Return a file-level ruff suppression line, or ``None`` when empty."""
170
+ if not noqa_codes:
171
+ return None
172
+ listed = ", ".join(sorted(noqa_codes))
173
+ return f"# ruff: noqa: {listed}"
174
+
175
+
176
+ def _module_docstring(specs: Sequence[ModelSpec]) -> str:
177
+ """Return the module docstring naming the source namespace."""
178
+ namespace = _shared_namespace(specs)
179
+ return (
180
+ f'"""Generated models for the {namespace} lexicon namespace.\n\n'
181
+ "This module is emitted by ``lairs gen`` from the vendored lexicons and\n"
182
+ "must not be edited by hand. Each class mirrors a lexicon record, object,\n"
183
+ 'or union definition.\n"""'
184
+ )
185
+
186
+
187
+ def _shared_namespace(specs: Sequence[ModelSpec]) -> str:
188
+ """Return the namespace shared by a sequence of specs.
189
+
190
+ Records and objects in one emitted module all share their first three
191
+ dotted nsid components (for example ``pub.layers.annotation``); the module
192
+ docstring names that common prefix rather than each file's nsid.
193
+ """
194
+ nsids = sorted({spec.nsid for spec in specs})
195
+ if not nsids:
196
+ return "pub.layers"
197
+ prefix_parts = nsids[0].split(".")[:3]
198
+ return ".".join(prefix_parts)
199
+
200
+
201
+ def _imports(*, uses_datetime: bool, uses_literal: bool, uses_blobref: bool) -> str:
202
+ """Return the import block for the emitted module.
203
+
204
+ The generated modules deliberately omit ``from __future__ import
205
+ annotations``: didactic resolves field annotations eagerly at class
206
+ creation, so the annotated imports must be live runtime names, and keeping
207
+ them eager also lets ruff see them as used rather than typing-only.
208
+ """
209
+ stdlib: list[str] = []
210
+ if uses_datetime:
211
+ stdlib.append("from datetime import datetime")
212
+ if uses_literal:
213
+ stdlib.append("from typing import Literal")
214
+ lines: list[str] = []
215
+ if stdlib:
216
+ lines.extend(stdlib)
217
+ lines.append("")
218
+ lines.append("import didactic.api as dx")
219
+ if uses_blobref:
220
+ lines.append("")
221
+ lines.append("from lairs.records.blobref import BlobRef")
222
+ return "\n".join(lines)
223
+
224
+
225
+ def _dunder_all(specs: Sequence[ModelSpec]) -> str:
226
+ """Return the module ``__all__`` listing every emitted class."""
227
+ names = sorted(_emitted_names(specs))
228
+ quoted = ",\n ".join(f'"{name}"' for name in names)
229
+ if not names:
230
+ return "__all__: list[str] = []"
231
+ return f"__all__ = [\n {quoted},\n]"
232
+
233
+
234
+ def _emitted_names(specs: Sequence[ModelSpec]) -> list[str]:
235
+ """Return every class name a spec sequence emits, variants included."""
236
+ names: list[str] = []
237
+ for spec in specs:
238
+ names.append(spec.name)
239
+ names.extend(variant.class_name for variant in spec.variants)
240
+ return names
241
+
242
+
243
+ def _class_text(spec: ModelSpec) -> str:
244
+ """Render a single spec to a class definition (or union family)."""
245
+ if spec.is_union:
246
+ return _union_text(spec)
247
+ return _model_text(spec)
248
+
249
+
250
+ def _model_text(spec: ModelSpec) -> str:
251
+ """Render a record or object spec to a ``dx.Model`` subclass."""
252
+ docstring = _class_docstring(spec.description or f"The {spec.def_name} definition.")
253
+ lines = [f"class {spec.name}(dx.Model):", docstring, ""]
254
+ for field in spec.fields:
255
+ lines.extend(_field_lines(field))
256
+ return "\n".join(lines)
257
+
258
+
259
+ def _union_text(spec: ModelSpec) -> str:
260
+ """Render a formal union spec to a ``dx.TaggedUnion`` family."""
261
+ discriminator = spec.discriminator or "kind"
262
+ root = "\n".join(
263
+ [
264
+ f'class {spec.name}(dx.TaggedUnion, discriminator="{discriminator}"):',
265
+ _class_docstring(spec.description or f"The {spec.def_name} union."),
266
+ "",
267
+ ]
268
+ )
269
+ blocks = [root]
270
+ blocks.extend(
271
+ _variant_text(spec, variant, discriminator) for variant in spec.variants
272
+ )
273
+ return "\n\n\n".join(blocks)
274
+
275
+
276
+ def _variant_text(
277
+ spec: ModelSpec,
278
+ variant: VariantSpec,
279
+ discriminator: str,
280
+ ) -> str:
281
+ """Render a single union variant subclass."""
282
+ lines = [f"class {variant.class_name}({spec.name}):"]
283
+ lines.append(
284
+ _class_docstring(f"The {variant.discriminator_value!r} member of {spec.name}.")
285
+ )
286
+ lines.append("")
287
+ lines.append(
288
+ f" {discriminator}: Literal[{variant.discriminator_value!r}] = dx.field("
289
+ )
290
+ lines.append(f" default={variant.discriminator_value!r},")
291
+ lines.append(
292
+ f' description="discriminator pinning this member to '
293
+ f'{variant.discriminator_value}",'
294
+ )
295
+ lines.append(" )")
296
+ lines.append(f" value: {variant.target} | None = dx.field(")
297
+ lines.append(" default=None,")
298
+ lines.append(' description="the wrapped member model",')
299
+ lines.append(" )")
300
+ return "\n".join(lines)
301
+
302
+
303
+ def _field_lines(field: FieldSpec) -> list[str]:
304
+ """Render a single field declaration to source lines.
305
+
306
+ Lexicon property names are camelCase wire keys that must round-trip
307
+ verbatim through ATProto JSON, so the python attribute keeps the camelCase
308
+ name; the file-level ``N815`` suppression in the header covers them.
309
+ """
310
+ annotation = _field_annotation(field)
311
+ args = _field_args(field)
312
+ lines = [f" {field.name}: {annotation} = dx.field("]
313
+ lines.extend(f" {arg}" for arg in args)
314
+ lines.append(" )")
315
+ return lines
316
+
317
+
318
+ def _is_mixed_case(name: str) -> bool:
319
+ """Return whether a field name is mixedCase (carries an inner capital).
320
+
321
+ A purely lowercase or snake_case name does not trip ``N815``; only names
322
+ with an interior upper-case letter do.
323
+ """
324
+ return name != name.lower() and "_" not in name
325
+
326
+
327
+ def _field_annotation(field: FieldSpec) -> str:
328
+ """Return the python annotation source for a field."""
329
+ base = _base_annotation(field)
330
+ if field.required:
331
+ return base
332
+ return f"{base} | None"
333
+
334
+
335
+ # scalar field kinds whose python annotation is a fixed builtin or alias.
336
+ _SCALAR_ANNOTATIONS: dict[str, str] = {
337
+ "str": "str",
338
+ "int": "int",
339
+ "bool": "bool",
340
+ "datetime": "datetime",
341
+ "bytes": "bytes",
342
+ "blob": "BlobRef",
343
+ }
344
+
345
+
346
+ def _base_annotation(field: FieldSpec) -> str:
347
+ """Return the unwrapped python annotation source for a field."""
348
+ kind = field.type_kind
349
+ if kind == "embed":
350
+ return f"dx.Embed[{field.target}]"
351
+ if kind == "union":
352
+ return f"{field.target}"
353
+ if kind == "array":
354
+ item = field.item
355
+ element = _base_annotation(item) if item is not None else "str"
356
+ return f"tuple[{element}, ...]"
357
+ return _SCALAR_ANNOTATIONS.get(kind, "str")
358
+
359
+
360
+ def _field_args(field: FieldSpec) -> list[str]:
361
+ """Return the ``dx.field`` keyword argument source lines for a field."""
362
+ args: list[str] = []
363
+ if not field.required:
364
+ if field.type_kind == "array":
365
+ args.append("default_factory=tuple,")
366
+ else:
367
+ args.append("default=None,")
368
+ if field.description is not None:
369
+ args.append(f"description={_py_str(field.description)},")
370
+ extras = _field_extras(field)
371
+ if extras:
372
+ args.append(f"extras={{{extras}}},")
373
+ if field.type_kind == "bytes":
374
+ args.append("opaque=True,")
375
+ if not args:
376
+ # an unconditional placeholder keeps the call well-formed; required
377
+ # scalar fields with no description fall here
378
+ args.append('description="generated field",')
379
+ return args
380
+
381
+
382
+ def _field_extras(field: FieldSpec) -> str:
383
+ """Return the ``extras`` mapping literal contents for a field, if any."""
384
+ entries: list[str] = []
385
+ if field.string_format is not None:
386
+ entries.append(f'"format": {_py_str(field.string_format)}')
387
+ if field.known_values:
388
+ values = ", ".join(_py_str(value) for value in field.known_values)
389
+ entries.append(f'"knownValues": ({values},)')
390
+ if field.minimum is not None:
391
+ entries.append(f'"minimum": {field.minimum}')
392
+ if field.maximum is not None:
393
+ entries.append(f'"maximum": {field.maximum}')
394
+ if field.min_length is not None:
395
+ entries.append(f'"minLength": {field.min_length}')
396
+ if field.max_length is not None:
397
+ entries.append(f'"maxLength": {field.max_length}')
398
+ return ", ".join(entries)
399
+
400
+
401
+ def _class_docstring(summary: str) -> str:
402
+ """Return an indented one-line numpy-style class docstring."""
403
+ text = " ".join(summary.split())
404
+ if not text.endswith("."):
405
+ text = f"{text}."
406
+ return f' """{_escape_docstring(text)}"""'
407
+
408
+
409
+ def _escape_docstring(text: str) -> str:
410
+ """Escape a docstring body so it is a valid triple-quoted literal."""
411
+ return text.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
412
+
413
+
414
+ def _py_str(value: str) -> str:
415
+ """Return a python source string literal for ``value``."""
416
+ escaped = value.replace("\\", "\\\\").replace('"', '\\"')
417
+ return f'"{escaped}"'
418
+
419
+
420
+ def _spec_uses_datetime(spec: ModelSpec) -> bool:
421
+ """Return whether any field of a spec is datetime-typed."""
422
+ return any(_field_uses_datetime(field) for field in spec.fields)
423
+
424
+
425
+ def _field_uses_datetime(field: FieldSpec) -> bool:
426
+ """Return whether a field (or its array element) is datetime-typed."""
427
+ if field.type_kind == "datetime":
428
+ return True
429
+ if field.type_kind == "array" and field.item is not None:
430
+ return _field_uses_datetime(field.item)
431
+ return False
432
+
433
+
434
+ def _spec_uses_mixed_case(spec: ModelSpec) -> bool:
435
+ """Return whether any field of a spec carries a camelCase wire name."""
436
+ return any(_is_mixed_case(field.name) for field in spec.fields)
437
+
438
+
439
+ def _spec_uses_blobref(spec: ModelSpec) -> bool:
440
+ """Return whether any field of a spec is a blob reference."""
441
+ return any(_field_uses_blobref(field) for field in spec.fields)
442
+
443
+
444
+ def _field_uses_blobref(field: FieldSpec) -> bool:
445
+ """Return whether a field (or its array element) is a blob reference."""
446
+ if field.type_kind == "blob":
447
+ return True
448
+ if field.type_kind == "array" and field.item is not None:
449
+ return _field_uses_blobref(field.item)
450
+ return False