json2vec 0.4.6__tar.gz → 0.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {json2vec-0.4.6 → json2vec-0.4.8}/NOTICE +1 -1
  2. json2vec-0.4.8/PKG-INFO +398 -0
  3. json2vec-0.4.8/README.md +359 -0
  4. {json2vec-0.4.6 → json2vec-0.4.8}/pyproject.toml +13 -6
  5. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/__init__.py +18 -12
  6. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/contracts.py +5 -4
  7. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/mutations.py +54 -9
  8. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/root.py +51 -28
  9. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/datasets/__init__.py +5 -0
  10. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/datasets/base.py +8 -0
  11. json2vec-0.4.8/src/json2vec/data/datasets/custom.py +331 -0
  12. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/datasets/polars.py +11 -2
  13. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/datasets/streaming.py +31 -15
  14. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/iterables.py +57 -4
  15. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/processing.py +62 -11
  16. json2vec-0.4.8/src/json2vec/helpers/hyperparameters.py +0 -0
  17. json2vec-0.4.8/src/json2vec/helpers/optimizers.py +78 -0
  18. json2vec-0.4.8/src/json2vec/helpers/trainer.py +0 -0
  19. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/inference/__init__.py +5 -11
  20. json2vec-0.4.8/src/json2vec/inference/deployment.py +691 -0
  21. json2vec-0.4.8/src/json2vec/structs/__init__.py +0 -0
  22. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/structs/enums.py +6 -1
  23. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/structs/experiment.py +49 -14
  24. json2vec-0.4.8/src/json2vec/structs/structure.py +110 -0
  25. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/structs/tree.py +159 -2
  26. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/base.py +19 -39
  27. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/category.py +52 -57
  28. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/dateparts.py +2 -0
  29. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/entity.py +2 -0
  30. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/number.py +44 -41
  31. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/set.py +42 -98
  32. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/text.py +2 -0
  33. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/vector.py +2 -0
  34. json2vec-0.4.8/src/json2vec/tensorfields/shared/__init__.py +80 -0
  35. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/shared/counter.py +3 -1
  36. json2vec-0.4.8/src/json2vec/tensorfields/shared/vocabulary.py +436 -0
  37. json2vec-0.4.8/src/json2vec.egg-info/PKG-INFO +398 -0
  38. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec.egg-info/SOURCES.txt +6 -1
  39. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec.egg-info/requires.txt +6 -3
  40. {json2vec-0.4.6 → json2vec-0.4.8}/tests/test_callbacks.py +6 -30
  41. json2vec-0.4.8/tests/test_optimizers.py +78 -0
  42. {json2vec-0.4.6 → json2vec-0.4.8}/tests/test_public_api.py +3 -3
  43. json2vec-0.4.6/PKG-INFO +0 -358
  44. json2vec-0.4.6/README.md +0 -322
  45. json2vec-0.4.6/src/json2vec/architecture/plot.py +0 -562
  46. json2vec-0.4.6/src/json2vec/inference/deployment.py +0 -390
  47. json2vec-0.4.6/src/json2vec/structs/structure.py +0 -58
  48. json2vec-0.4.6/src/json2vec/tensorfields/shared/__init__.py +0 -12
  49. json2vec-0.4.6/src/json2vec/tensorfields/shared/vocabulary.py +0 -283
  50. json2vec-0.4.6/src/json2vec.egg-info/PKG-INFO +0 -358
  51. {json2vec-0.4.6 → json2vec-0.4.8}/LICENSE +0 -0
  52. {json2vec-0.4.6 → json2vec-0.4.8}/setup.cfg +0 -0
  53. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/__init__.py +0 -0
  54. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/attention.py +0 -0
  55. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/checkpoint.py +0 -0
  56. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/encoder.py +0 -0
  57. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/graph.py +0 -0
  58. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/node.py +0 -0
  59. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/pool.py +0 -0
  60. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/rotary.py +0 -0
  61. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/architecture/runtime.py +0 -0
  62. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/data/__init__.py +0 -0
  63. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/distributed.py +0 -0
  64. {json2vec-0.4.6/src/json2vec/structs → json2vec-0.4.8/src/json2vec/helpers}/__init__.py +0 -0
  65. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/inference/callback.py +0 -0
  66. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/logging/__init__.py +0 -0
  67. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/logging/config.py +0 -0
  68. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/logging/epoch.py +0 -0
  69. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/logging/throughput.py +0 -0
  70. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/preprocessors/__init__.py +0 -0
  71. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/preprocessors/base.py +0 -0
  72. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/preprocessors/extensions/__init__.py +0 -0
  73. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/preprocessors/spec.py +0 -0
  74. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/structs/packages.py +0 -0
  75. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/structs/selectors.py +0 -0
  76. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/__init__.py +0 -0
  77. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/extensions/__init__.py +0 -0
  78. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec/tensorfields/spec.py +0 -0
  79. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec.egg-info/dependency_links.txt +0 -0
  80. {json2vec-0.4.6 → json2vec-0.4.8}/src/json2vec.egg-info/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  JSON2Vec
2
- Copyright 2026 Grantham Taylor
2
+ Copyright 2024-2026 Grantham Taylor
3
3
 
4
4
  This project is licensed under the Apache License, Version 2.0.
5
5
  You may obtain a copy of the License at:
@@ -0,0 +1,398 @@
1
+ Metadata-Version: 2.4
2
+ Name: json2vec
3
+ Version: 0.4.8
4
+ Summary: Schema-first PyTorch models for hierarchical / nested / sequence data structures
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ License-File: NOTICE
10
+ Requires-Dist: beartype>=0.21.0
11
+ Requires-Dist: pluggy>=1.6.0
12
+ Requires-Dist: rich>=14.0.0
13
+ Requires-Dist: pydantic>=2.11.7
14
+ Requires-Dist: jmespath>=1.0.1
15
+ Requires-Dist: loguru>=0.7.3
16
+ Requires-Dist: anytree>=2.13.0
17
+ Requires-Dist: pyarrow>=21.0.0
18
+ Requires-Dist: polars>=1.35.2
19
+ Requires-Dist: numpy>=2.2.6
20
+ Requires-Dist: lightning>=2.6.4
21
+ Requires-Dist: tensordict>=0.10.0
22
+ Requires-Dist: torch>=2.7.1
23
+ Provides-Extra: serving
24
+ Requires-Dist: fastapi>=0.124.0; extra == "serving"
25
+ Requires-Dist: orjson>=3.10.0; extra == "serving"
26
+ Requires-Dist: pydantic-settings>=2.10.1; extra == "serving"
27
+ Requires-Dist: uvicorn>=0.38.0; extra == "serving"
28
+ Provides-Extra: text
29
+ Requires-Dist: transformers>=4.55.0; extra == "text"
30
+ Provides-Extra: docs
31
+ Requires-Dist: fastapi>=0.124.0; extra == "docs"
32
+ Requires-Dist: mkdocs-material>=9.6; extra == "docs"
33
+ Requires-Dist: mkdocs-jupyter>=0.26.3; extra == "docs"
34
+ Requires-Dist: mkdocstrings[python]>=0.27; extra == "docs"
35
+ Requires-Dist: orjson>=3.10.0; extra == "docs"
36
+ Requires-Dist: pydantic-settings>=2.10.1; extra == "docs"
37
+ Requires-Dist: uvicorn>=0.38.0; extra == "docs"
38
+ Dynamic: license-file
39
+
40
+ <h1 align="center"><code>json2vec</code></h1>
41
+
42
+ <p align="center">
43
+ <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
44
+ <a href="LICENSE"><img alt="Apache-2.0 license" src="https://img.shields.io/badge/license-Apache--2.0-2E8B57" /></a>
45
+ <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-MkDocs-526CFE?logo=materialformkdocs&amp;logoColor=white" /></a>
46
+ <!-- discord-invite:start -->
47
+ <a href="https://discord.gg/DVyZUkvTFA"><img alt="Discord channel invite" src="https://img.shields.io/badge/discord-join%20the%20channel-5865F2?logo=discord&amp;logoColor=white" /></a>
48
+ <!-- discord-invite:end -->
49
+ </p>
50
+
51
+ `json2vec` builds PyTorch/Lightning models directly from JSON-like schemas.
52
+ It is meant for predictive modeling on records that are not naturally flat:
53
+ customers with transactions, orders with line items, sessions with clickstream
54
+ events, devices recurring across histories, and mixed datatypes at every level.
55
+
56
+ Most ML pipelines flatten that shape first, then train on one fixed feature
57
+ row. `json2vec` takes the opposite path: describe the structured record, and
58
+ the schema becomes the model.
59
+
60
+ ## Core Idea
61
+
62
+ A `json2vec` schema is both a data contract and an architecture blueprint.
63
+
64
+ - Leaf fields such as `Number`, `Category`, `Set`, `Entity`, `Text`, and
65
+ `Vector` become datatype-specific tensorfields.
66
+ - `Array` nodes become local context encoders for repeated child objects.
67
+ - Targets, masks, pruning, and embeddings are configured on the same schema
68
+ tree.
69
+ - Prediction output is keyed by schema address, so decoded values and
70
+ embeddings remain attached to the part of the record that produced them.
71
+
72
+ That gives one model surface for supervised prediction, masked reconstruction,
73
+ unsupervised embedding workflows, schema mutation, field importance, batch
74
+ inference, and serving.
75
+
76
+ ## A Model From A Nested Record
77
+
78
+ ```python
79
+ import json2vec as j2v
80
+
81
+ model = j2v.Model.from_schema(
82
+ j2v.Category("customer_tier", max_vocab_size=16),
83
+ j2v.Array(
84
+ j2v.Category("sku", max_vocab_size=2048),
85
+ j2v.Number("quantity"),
86
+ j2v.Number("price"),
87
+ name="line_items",
88
+ max_length=32,
89
+ embed=True,
90
+ ),
91
+ j2v.Category("returned", target=True, max_vocab_size=2),
92
+ name="order",
93
+ d_model=64,
94
+ n_layers=2,
95
+ n_heads=4,
96
+ embed=True,
97
+ )
98
+ ```
99
+
100
+ This model reads records shaped like:
101
+
102
+ ```python
103
+ {
104
+ "customer_tier": "gold",
105
+ "line_items": [
106
+ {"sku": "A12", "quantity": 2, "price": 19.99},
107
+ {"sku": "B07", "quantity": 1, "price": 45.50},
108
+ ],
109
+ "returned": "false",
110
+ }
111
+ ```
112
+
113
+ The `line_items` branch has its own repeated context, `returned` is withheld
114
+ from input and decoded as a supervised target, and `embed=True` asks prediction
115
+ to emit embeddings at configured addresses.
116
+
117
+ ## Train With Lightning
118
+
119
+ `j2v.Model` is a LightningModule. `j2v.PolarsDataModule` and
120
+ `j2v.StreamingDataModule` are LightningDataModule implementations. The schema
121
+ defines the model tree, typed losses, prediction outputs, and embeddings;
122
+ Lightning runs `fit`, `validate`, `test`, and `predict`.
123
+
124
+ ```python
125
+ import lightning.pytorch as lit
126
+ import polars as pl
127
+ import torch
128
+
129
+ import json2vec as j2v
130
+
131
+ records = pl.read_ndjson("docs/data/iris.jsonl").head(36)
132
+
133
+ model = j2v.Model.from_schema(
134
+ j2v.Number("sepal_length"),
135
+ j2v.Number("petal_length"),
136
+ j2v.Category("species", target=True, max_vocab_size=4, topk=[2]),
137
+ d_model=16,
138
+ n_layers=1,
139
+ n_heads=4,
140
+ batch_size=8,
141
+ embed=True,
142
+ optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
143
+ )
144
+
145
+ datamodule = j2v.PolarsDataModule(
146
+ model=model,
147
+ train=records,
148
+ validate=records,
149
+ num_workers=0,
150
+ persistent_workers=False,
151
+ pin_memory=False,
152
+ observation_buffer_size=32,
153
+ sample_rate=1.0,
154
+ )
155
+
156
+ trainer = lit.Trainer(
157
+ accelerator="cpu",
158
+ max_epochs=1,
159
+ logger=False,
160
+ enable_progress_bar=False,
161
+ enable_model_summary=False,
162
+ enable_checkpointing=False,
163
+ limit_train_batches=1,
164
+ limit_val_batches=1,
165
+ )
166
+
167
+ trainer.fit(model=model, datamodule=datamodule)
168
+ ```
169
+
170
+ For larger jobs, the same model can run through normal Lightning callbacks,
171
+ checkpointing, precision settings, device placement, and distributed
172
+ strategies. See
173
+ [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/).
174
+
175
+ ## Predict And Embed
176
+
177
+ For small interactive batches, call `model.predict(...)` with raw dictionaries.
178
+
179
+ ```python
180
+ predictions = model.predict(records.to_dicts()[:3])
181
+
182
+ species = predictions[j2v.Address("record", "species")]
183
+ record = predictions[j2v.Address("record")]
184
+
185
+ print(species["content"]["value"])
186
+ print(species["content"]["probability"])
187
+ print(record["embedding"])
188
+ ```
189
+
190
+ For larger offline jobs, configure a `predict` split on a data module and attach
191
+ `j2v.Writer` to Lightning's prediction loop.
192
+
193
+ ```python
194
+ writer = j2v.Writer("predictions")
195
+
196
+ trainer = lit.Trainer(
197
+ accelerator="cpu",
198
+ callbacks=[writer],
199
+ logger=False,
200
+ )
201
+
202
+ predict_datamodule = j2v.PolarsDataModule(
203
+ model=model,
204
+ predict=records.drop("species"),
205
+ num_workers=0,
206
+ persistent_workers=False,
207
+ pin_memory=False,
208
+ )
209
+
210
+ trainer.predict(model=model, datamodule=predict_datamodule)
211
+ ```
212
+
213
+ `Writer` creates rank-partitioned Parquet files such as
214
+ `predictions/rank-0.parquet`. Use a postprocessor when downstream systems need
215
+ flat columns, renamed addresses, redacted payloads, or fewer fields. See
216
+ [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
217
+ and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/).
218
+
219
+ ## Learning Modes
220
+
221
+ `json2vec` does not maintain separate supervised and self-supervised code
222
+ paths. Supervised learning is the special case where a target field is hidden
223
+ from the input 100% of the time and decoded from the remaining context.
224
+
225
+ | Setting | What the model sees | What prediction can emit |
226
+ | --- | --- | --- |
227
+ | plain input | value is visible | no decoded output unless otherwise configured |
228
+ | `target=True` | value is hidden | decoded supervised output |
229
+ | `p_mask` | some observed values are hidden during training | decoded reconstruction |
230
+ | `p_prune` | whole leaf instances are hidden during training | decoded reconstruction |
231
+ | `embed=True` | does not hide the value | embedding at that address |
232
+
233
+ `target=True` is exact shorthand for `p_prune=1.0`. Use `p_mask` for stochastic
234
+ value-level reconstruction with rates lower than `1.0`. Use `embed=True` when
235
+ you want a representation returned from prediction.
236
+
237
+ ## Data Modules
238
+
239
+ Data modules load raw records, apply optional preprocessing, batch
240
+ observations, tensorize values from the model schema, apply training-time
241
+ masking and target pruning, and hand encoded batches to Lightning.
242
+
243
+ Choose the data module by where the records live:
244
+
245
+ | Use case | Module |
246
+ | --- | --- |
247
+ | Tutorials, tests, notebooks, in-memory Polars frames | `PolarsDataModule` |
248
+ | Many local files | `StreamingDataModule` |
249
+ | S3-backed datasets | `StreamingDataModule` |
250
+ | Distributed training or prediction over large inputs | `StreamingDataModule` |
251
+
252
+ `StreamingDataModule` supports local paths and `s3://...` roots with `ndjson`,
253
+ `parquet`, `feather`, `avro`, `csv`, `orc`, and `json` suffixes. Split
254
+ arguments are compiled regular expressions matched against discovered file
255
+ paths.
256
+
257
+ See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
258
+ for split configuration, sharding, sampling, buffers, and preprocessors.
259
+
260
+ ## What Makes This Different
261
+
262
+ - **Hierarchical context encoding:** child records interact locally before
263
+ their representation flows upward.
264
+ - **Extensible datatypes:** each field type owns validation, tensorization,
265
+ missing-state handling, masking, decoding, loss, metrics, and output writing.
266
+ - **Unified training roles:** `target=True`, `p_prune`, and `p_mask` all use the
267
+ same reconstruction path.
268
+ - **Embedding trees:** embeddings can come from the root, arrays, or selected
269
+ leaves.
270
+ - **Schema evolution:** fields can be added, removed, updated, reset, or
271
+ temporarily overridden after construction.
272
+ - **Production missingness semantics:** `null`, `padded`, `masked`, and
273
+ `valued` are distinct tensorfield states.
274
+ - **Training-serving parity:** queries, preprocessors, tensorization, model
275
+ execution, prediction writing, and postprocessors stay on the same configured
276
+ path.
277
+
278
+ ## Where It Fits
279
+
280
+ Use `json2vec` when relationships inside the record matter: account histories,
281
+ fraud or risk snapshots, order and fulfillment events, flight itineraries,
282
+ operations telemetry, user sessions, repeated measurements, or mixed datatype
283
+ objects where flattening would discard useful structure.
284
+
285
+ Use a simpler tabular model when flattening loses no meaningful context. The
286
+ point is not to replace every table. The point is to model nested business data
287
+ without making a feature table the only representation the model can see.
288
+
289
+ ## What It Does Not Do
290
+
291
+ `json2vec` stops at the representation and typed prediction layer. It is not a
292
+ feature store, governance system, rule engine, authorization layer,
293
+ decision-capture system, or audit platform. Those systems can consume
294
+ `json2vec` embeddings and predictions, but their policies and operational
295
+ controls remain separate concerns.
296
+
297
+ The open-source layer is the reusable encoder and runtime infrastructure. It
298
+ does not require users to publish data, schemas, checkpoints, or model
299
+ parameters.
300
+
301
+ ## Install
302
+
303
+ For local development:
304
+
305
+ ```bash
306
+ uv sync
307
+ ```
308
+
309
+ The package requires Python `>=3.12`.
310
+
311
+ Optional extras:
312
+
313
+ ```bash
314
+ uv sync --extra text
315
+ uv sync --extra serving
316
+ uv sync --extra docs
317
+ ```
318
+
319
+ The `text` extra installs Hugging Face `transformers`. The `serving` extra
320
+ installs FastAPI-backed deployment dependencies. The `docs` extra installs the
321
+ MkDocs toolchain.
322
+
323
+ ## Documentation Map
324
+
325
+ Start with:
326
+
327
+ - [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
328
+ - [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
329
+ - [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
330
+ - [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
331
+ - [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
332
+ - [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
333
+ - [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/)
334
+ - [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
335
+ - [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
336
+ - [API Reference](https://json2vec.github.io/json2vec/reference/api/)
337
+
338
+ Tutorials and guides:
339
+
340
+ - [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
341
+ - [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
342
+ - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
343
+ - [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
344
+ - [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
345
+ - [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
346
+ - [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/)
347
+ - [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
348
+ - [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking/)
349
+ - [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
350
+ - [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure/)
351
+ - [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
352
+
353
+ Build the docs locally with:
354
+
355
+ ```bash
356
+ uv run --extra docs mkdocs build --strict
357
+ ```
358
+
359
+ ## Repository Layout
360
+
361
+ - `src/json2vec/architecture`: model assembly, attention, pooling, and routing
362
+ - `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline
363
+ - `src/json2vec/inference`: serving and prediction callbacks
364
+ - `src/json2vec/logging`: runtime logging callbacks
365
+ - `src/json2vec/preprocessors`: preprocessor registry
366
+ - `src/json2vec/structs`: pydantic config models, enums, and tree nodes
367
+ - `src/json2vec/tensorfields`: tensorfield plugin system and built-in fields
368
+ - `tests/`: package test suite
369
+ - `docs/`: tutorials, guides, diagrams, and whitepaper source
370
+
371
+ ## Development
372
+
373
+ Run tests:
374
+
375
+ ```bash
376
+ uv run pytest
377
+ ```
378
+
379
+ Run type and lint checks:
380
+
381
+ ```bash
382
+ uv run ty check src/json2vec --output-format concise
383
+ uv run ruff check
384
+ ```
385
+
386
+ ## Community
387
+
388
+ Join the [`json2vec` Discord](https://discord.gg/DVyZUkvTFA) for questions,
389
+ design discussion, and release notes.
390
+
391
+ ## License
392
+
393
+ Licensed under the Apache License, Version 2.0. See `LICENSE` and `NOTICE`.
394
+
395
+ ## References
396
+
397
+ - `BIBLIOGRAPHY.md`
398
+ - `CITATION.bib`