json2vec 0.4.5__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {json2vec-0.4.5 → json2vec-0.4.7}/NOTICE +1 -1
  2. json2vec-0.4.7/PKG-INFO +395 -0
  3. json2vec-0.4.7/README.md +359 -0
  4. {json2vec-0.4.5 → json2vec-0.4.7}/pyproject.toml +4 -2
  5. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/__init__.py +74 -3
  6. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/checkpoint.py +1 -1
  7. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/encoder.py +1 -1
  8. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/plot.py +2 -3
  9. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/root.py +22 -65
  10. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/runtime.py +52 -44
  11. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/datasets/__init__.py +5 -0
  12. json2vec-0.4.7/src/json2vec/data/datasets/custom.py +322 -0
  13. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/datasets/streaming.py +20 -13
  14. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/processing.py +62 -11
  15. json2vec-0.4.7/src/json2vec/inference/__init__.py +64 -0
  16. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/inference/callback.py +17 -25
  17. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/inference/deployment.py +40 -13
  18. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/preprocessors/base.py +1 -1
  19. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/enums.py +6 -0
  20. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/experiment.py +15 -18
  21. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/packages.py +0 -25
  22. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/structure.py +3 -3
  23. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/tree.py +55 -43
  24. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/base.py +4 -1
  25. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/category.py +2 -0
  26. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/dateparts.py +2 -0
  27. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/entity.py +2 -0
  28. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/number.py +2 -0
  29. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/set.py +14 -36
  30. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/text.py +2 -0
  31. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/vector.py +2 -0
  32. json2vec-0.4.7/src/json2vec.egg-info/PKG-INFO +395 -0
  33. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec.egg-info/SOURCES.txt +1 -0
  34. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec.egg-info/requires.txt +2 -0
  35. {json2vec-0.4.5 → json2vec-0.4.7}/tests/test_public_api.py +12 -0
  36. json2vec-0.4.5/PKG-INFO +0 -355
  37. json2vec-0.4.5/README.md +0 -321
  38. json2vec-0.4.5/src/json2vec/structs/__init__.py +0 -0
  39. json2vec-0.4.5/src/json2vec.egg-info/PKG-INFO +0 -355
  40. {json2vec-0.4.5 → json2vec-0.4.7}/LICENSE +0 -0
  41. {json2vec-0.4.5 → json2vec-0.4.7}/setup.cfg +0 -0
  42. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/__init__.py +0 -0
  43. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/attention.py +0 -0
  44. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/contracts.py +0 -0
  45. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/graph.py +0 -0
  46. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/mutations.py +0 -0
  47. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/node.py +0 -0
  48. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/pool.py +0 -0
  49. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/architecture/rotary.py +0 -0
  50. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/__init__.py +0 -0
  51. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/datasets/base.py +0 -0
  52. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/datasets/polars.py +0 -0
  53. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/data/iterables.py +0 -0
  54. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/distributed.py +0 -0
  55. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/logging/__init__.py +0 -0
  56. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/logging/config.py +0 -0
  57. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/logging/epoch.py +0 -0
  58. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/logging/throughput.py +0 -0
  59. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/preprocessors/__init__.py +0 -0
  60. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/preprocessors/extensions/__init__.py +0 -0
  61. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/preprocessors/spec.py +0 -0
  62. {json2vec-0.4.5/src/json2vec/inference → json2vec-0.4.7/src/json2vec/structs}/__init__.py +0 -0
  63. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/structs/selectors.py +0 -0
  64. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/__init__.py +0 -0
  65. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/extensions/__init__.py +0 -0
  66. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/shared/__init__.py +0 -0
  67. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/shared/counter.py +0 -0
  68. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/shared/vocabulary.py +0 -0
  69. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec/tensorfields/spec.py +0 -0
  70. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec.egg-info/dependency_links.txt +0 -0
  71. {json2vec-0.4.5 → json2vec-0.4.7}/src/json2vec.egg-info/top_level.txt +0 -0
  72. {json2vec-0.4.5 → json2vec-0.4.7}/tests/test_callbacks.py +0 -0
@@ -1,5 +1,5 @@
1
1
  JSON2Vec
2
- Copyright 2026 Grantham Taylor
2
+ Copyright 2024-2026 Grantham Taylor
3
3
 
4
4
  This project is licensed under the Apache License, Version 2.0.
5
5
  You may obtain a copy of the License at:
@@ -0,0 +1,395 @@
1
+ Metadata-Version: 2.4
2
+ Name: json2vec
3
+ Version: 0.4.7
4
+ Summary: Schema-first PyTorch models for hierarchical / nested / sequence data structures
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ License-File: NOTICE
10
+ Requires-Dist: beartype>=0.21.0
11
+ Requires-Dist: pluggy>=1.6.0
12
+ Requires-Dist: rich>=14.0.0
13
+ Requires-Dist: pydantic>=2.11.7
14
+ Requires-Dist: jmespath>=1.0.1
15
+ Requires-Dist: loguru>=0.7.3
16
+ Requires-Dist: anytree>=2.13.0
17
+ Requires-Dist: ordered-set>=4.1.0
18
+ Requires-Dist: pyarrow>=21.0.0
19
+ Requires-Dist: polars>=1.35.2
20
+ Requires-Dist: numpy>=2.2.6
21
+ Requires-Dist: lightning>=2.6.4
22
+ Requires-Dist: tensordict>=0.10.0
23
+ Requires-Dist: torch>=2.7.1
24
+ Provides-Extra: serving
25
+ Requires-Dist: litserve>=0.2.13; extra == "serving"
26
+ Requires-Dist: pydantic-settings>=2.10.1; extra == "serving"
27
+ Provides-Extra: text
28
+ Requires-Dist: transformers>=4.55.0; extra == "text"
29
+ Provides-Extra: docs
30
+ Requires-Dist: litserve>=0.2.13; extra == "docs"
31
+ Requires-Dist: mkdocs-material>=9.6; extra == "docs"
32
+ Requires-Dist: mkdocs-jupyter>=0.26.3; extra == "docs"
33
+ Requires-Dist: mkdocstrings[python]>=0.27; extra == "docs"
34
+ Requires-Dist: pydantic-settings>=2.10.1; extra == "docs"
35
+ Dynamic: license-file
36
+
37
+ <h1 align="center"><code>json2vec</code></h1>
38
+
39
+ <p align="center">
40
+ <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
41
+ <a href="LICENSE"><img alt="Apache-2.0 license" src="https://img.shields.io/badge/license-Apache--2.0-2E8B57" /></a>
42
+ <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-MkDocs-526CFE?logo=materialformkdocs&amp;logoColor=white" /></a>
43
+ <!-- discord-invite:start -->
44
+ <a href="https://discord.gg/DVyZUkvTFA"><img alt="Discord channel invite" src="https://img.shields.io/badge/discord-join%20the%20channel-5865F2?logo=discord&amp;logoColor=white" /></a>
45
+ <!-- discord-invite:end -->
46
+ </p>
47
+
48
+ `json2vec` builds PyTorch/Lightning models directly from JSON-like schemas.
49
+ It is meant for predictive modeling on records that are not naturally flat:
50
+ customers with transactions, orders with line items, sessions with clickstream
51
+ events, devices recurring across histories, and mixed datatypes at every level.
52
+
53
+ Most ML pipelines flatten that shape first, then train on one fixed feature
54
+ row. `json2vec` takes the opposite path: describe the structured record, and
55
+ the schema becomes the model.
56
+
57
+ ## Core Idea
58
+
59
+ A `json2vec` schema is both a data contract and an architecture blueprint.
60
+
61
+ - Leaf fields such as `Number`, `Category`, `Set`, `Entity`, `Text`, and
62
+ `Vector` become datatype-specific tensorfields.
63
+ - `Array` nodes become local context encoders for repeated child objects.
64
+ - Targets, masks, pruning, and embeddings are configured on the same schema
65
+ tree.
66
+ - Prediction output is keyed by schema address, so decoded values and
67
+ embeddings remain attached to the part of the record that produced them.
68
+
69
+ That gives one model surface for supervised prediction, masked reconstruction,
70
+ unsupervised embedding workflows, schema mutation, field importance, batch
71
+ inference, and serving.
72
+
73
+ ## A Model From A Nested Record
74
+
75
+ ```python
76
+ import json2vec as j2v
77
+
78
+ model = j2v.Model.from_schema(
79
+ j2v.Category("customer_tier", max_vocab_size=16),
80
+ j2v.Array(
81
+ j2v.Category("sku", max_vocab_size=2048),
82
+ j2v.Number("quantity"),
83
+ j2v.Number("price"),
84
+ name="line_items",
85
+ max_length=32,
86
+ embed=True,
87
+ ),
88
+ j2v.Category("returned", target=True, max_vocab_size=2),
89
+ name="order",
90
+ d_model=64,
91
+ n_layers=2,
92
+ n_heads=4,
93
+ embed=True,
94
+ )
95
+ ```
96
+
97
+ This model reads records shaped like:
98
+
99
+ ```python
100
+ {
101
+ "customer_tier": "gold",
102
+ "line_items": [
103
+ {"sku": "A12", "quantity": 2, "price": 19.99},
104
+ {"sku": "B07", "quantity": 1, "price": 45.50},
105
+ ],
106
+ "returned": "false",
107
+ }
108
+ ```
109
+
110
+ The `line_items` branch has its own repeated context, `returned` is withheld
111
+ from input and decoded as a supervised target, and `embed=True` asks prediction
112
+ to emit embeddings at configured addresses.
113
+
114
+ ## Train With Lightning
115
+
116
+ `j2v.Model` is a LightningModule. `j2v.PolarsDataModule` and
117
+ `j2v.StreamingDataModule` are LightningDataModule implementations. The schema
118
+ defines the model tree, typed losses, prediction outputs, and embeddings;
119
+ Lightning runs `fit`, `validate`, `test`, and `predict`.
120
+
121
+ ```python
122
+ import lightning.pytorch as lit
123
+ import polars as pl
124
+ import torch
125
+
126
+ import json2vec as j2v
127
+
128
+ records = pl.read_ndjson("docs/data/iris.jsonl").head(36)
129
+
130
+ model = j2v.Model.from_schema(
131
+ j2v.Number("sepal_length"),
132
+ j2v.Number("petal_length"),
133
+ j2v.Category("species", target=True, max_vocab_size=4, topk=[2]),
134
+ d_model=16,
135
+ n_layers=1,
136
+ n_heads=4,
137
+ batch_size=8,
138
+ embed=True,
139
+ optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
140
+ )
141
+
142
+ datamodule = j2v.PolarsDataModule(
143
+ model=model,
144
+ train=records,
145
+ validate=records,
146
+ num_workers=0,
147
+ persistent_workers=False,
148
+ pin_memory=False,
149
+ observation_buffer_size=32,
150
+ sample_rate=1.0,
151
+ )
152
+
153
+ trainer = lit.Trainer(
154
+ accelerator="cpu",
155
+ max_epochs=1,
156
+ logger=False,
157
+ enable_progress_bar=False,
158
+ enable_model_summary=False,
159
+ enable_checkpointing=False,
160
+ limit_train_batches=1,
161
+ limit_val_batches=1,
162
+ )
163
+
164
+ trainer.fit(model=model, datamodule=datamodule)
165
+ ```
166
+
167
+ For larger jobs, the same model can run through normal Lightning callbacks,
168
+ checkpointing, precision settings, device placement, and distributed
169
+ strategies. See
170
+ [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/).
171
+
172
+ ## Predict And Embed
173
+
174
+ For small interactive batches, call `model.predict(...)` with raw dictionaries.
175
+
176
+ ```python
177
+ predictions = model.predict(records.to_dicts()[:3])
178
+
179
+ species = predictions[j2v.Address("record", "species")]
180
+ record = predictions[j2v.Address("record")]
181
+
182
+ print(species["content"]["value"])
183
+ print(species["content"]["probability"])
184
+ print(record["embedding"])
185
+ ```
186
+
187
+ For larger offline jobs, configure a `predict` split on a data module and attach
188
+ `j2v.Writer` to Lightning's prediction loop.
189
+
190
+ ```python
191
+ writer = j2v.Writer("predictions")
192
+
193
+ trainer = lit.Trainer(
194
+ accelerator="cpu",
195
+ callbacks=[writer],
196
+ logger=False,
197
+ )
198
+
199
+ predict_datamodule = j2v.PolarsDataModule(
200
+ model=model,
201
+ predict=records.drop("species"),
202
+ num_workers=0,
203
+ persistent_workers=False,
204
+ pin_memory=False,
205
+ )
206
+
207
+ trainer.predict(model=model, datamodule=predict_datamodule)
208
+ ```
209
+
210
+ `Writer` creates rank-partitioned Parquet files such as
211
+ `predictions/rank-0.parquet`. Use a postprocessor when downstream systems need
212
+ flat columns, renamed addresses, redacted payloads, or fewer fields. See
213
+ [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
214
+ and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/).
215
+
216
+ ## Learning Modes
217
+
218
+ `json2vec` does not maintain separate supervised and self-supervised code
219
+ paths. Supervised learning is the special case where a target field is hidden
220
+ from the input 100% of the time and decoded from the remaining context.
221
+
222
+ | Setting | What the model sees | What prediction can emit |
223
+ | --- | --- | --- |
224
+ | plain input | value is visible | no decoded output unless otherwise configured |
225
+ | `target=True` | value is hidden | decoded supervised output |
226
+ | `p_mask` | some observed values are hidden during training | decoded reconstruction |
227
+ | `p_prune` | whole leaf instances are hidden during training | decoded reconstruction |
228
+ | `embed=True` | does not hide the value | embedding at that address |
229
+
230
+ `target=True` is exact shorthand for `p_prune=1.0`. Use `p_mask` for stochastic
231
+ value-level reconstruction with rates lower than `1.0`. Use `embed=True` when
232
+ you want a representation returned from prediction.
233
+
234
+ ## Data Modules
235
+
236
+ Data modules load raw records, apply optional preprocessing, batch
237
+ observations, tensorize values from the model schema, apply training-time
238
+ masking and target pruning, and hand encoded batches to Lightning.
239
+
240
+ Choose the data module by where the records live:
241
+
242
+ | Use case | Module |
243
+ | --- | --- |
244
+ | Tutorials, tests, notebooks, in-memory Polars frames | `PolarsDataModule` |
245
+ | Many local files | `StreamingDataModule` |
246
+ | S3-backed datasets | `StreamingDataModule` |
247
+ | Distributed training or prediction over large inputs | `StreamingDataModule` |
248
+
249
+ `StreamingDataModule` supports local paths and `s3://...` roots with `ndjson`,
250
+ `parquet`, `feather`, `avro`, `csv`, `orc`, and `json` suffixes. Split
251
+ arguments are compiled regular expressions matched against discovered file
252
+ paths.
253
+
254
+ See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
255
+ for split configuration, sharding, sampling, buffers, and preprocessors.
256
+
257
+ ## What Makes This Different
258
+
259
+ - **Hierarchical context encoding:** child records interact locally before
260
+ their representation flows upward.
261
+ - **Extensible datatypes:** each field type owns validation, tensorization,
262
+ missing-state handling, masking, decoding, loss, metrics, and output writing.
263
+ - **Unified training roles:** `target=True`, `p_prune`, and `p_mask` all use the
264
+ same reconstruction path.
265
+ - **Embedding trees:** embeddings can come from the root, arrays, or selected
266
+ leaves.
267
+ - **Schema evolution:** fields can be added, removed, updated, reset, or
268
+ temporarily overridden after construction.
269
+ - **Production missingness semantics:** `null`, `padded`, `masked`, and
270
+ `valued` are distinct tensorfield states.
271
+ - **Training-serving parity:** queries, preprocessors, tensorization, model
272
+ execution, prediction writing, and postprocessors stay on the same configured
273
+ path.
274
+
275
+ ## Where It Fits
276
+
277
+ Use `json2vec` when relationships inside the record matter: account histories,
278
+ fraud or risk snapshots, order and fulfillment events, flight itineraries,
279
+ operations telemetry, user sessions, repeated measurements, or mixed datatype
280
+ objects where flattening would discard useful structure.
281
+
282
+ Use a simpler tabular model when flattening loses no meaningful context. The
283
+ point is not to replace every table. The point is to model nested business data
284
+ without making a feature table the only representation the model can see.
285
+
286
+ ## What It Does Not Do
287
+
288
+ `json2vec` stops at the representation and typed prediction layer. It is not a
289
+ feature store, governance system, rule engine, authorization layer,
290
+ decision-capture system, or audit platform. Those systems can consume
291
+ `json2vec` embeddings and predictions, but their policies and operational
292
+ controls remain separate concerns.
293
+
294
+ The open-source layer is the reusable encoder and runtime infrastructure. It
295
+ does not require users to publish data, schemas, checkpoints, or model
296
+ parameters.
297
+
298
+ ## Install
299
+
300
+ For local development:
301
+
302
+ ```bash
303
+ uv sync
304
+ ```
305
+
306
+ The package requires Python `>=3.12`.
307
+
308
+ Optional extras:
309
+
310
+ ```bash
311
+ uv sync --extra text
312
+ uv sync --extra serving
313
+ uv sync --extra docs
314
+ ```
315
+
316
+ The `text` extra installs Hugging Face `transformers`. The `serving` extra
317
+ installs LitServe-backed deployment dependencies. The `docs` extra installs the
318
+ MkDocs toolchain.
319
+
320
+ ## Documentation Map
321
+
322
+ Start with:
323
+
324
+ - [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
325
+ - [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
326
+ - [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
327
+ - [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
328
+ - [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
329
+ - [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
330
+ - [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/)
331
+ - [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
332
+ - [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
333
+ - [API Reference](https://json2vec.github.io/json2vec/reference/api/)
334
+
335
+ Tutorials and guides:
336
+
337
+ - [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
338
+ - [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
339
+ - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
340
+ - [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
341
+ - [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
342
+ - [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
343
+ - [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/)
344
+ - [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
345
+ - [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking/)
346
+ - [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
347
+ - [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure/)
348
+ - [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
349
+
350
+ Build the docs locally with:
351
+
352
+ ```bash
353
+ uv run --extra docs mkdocs build --strict
354
+ ```
355
+
356
+ ## Repository Layout
357
+
358
+ - `src/json2vec/architecture`: model assembly, attention, pooling, and routing
359
+ - `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline
360
+ - `src/json2vec/inference`: serving and prediction callbacks
361
+ - `src/json2vec/logging`: runtime logging callbacks
362
+ - `src/json2vec/preprocessors`: preprocessor registry
363
+ - `src/json2vec/structs`: pydantic config models, enums, and tree nodes
364
+ - `src/json2vec/tensorfields`: tensorfield plugin system and built-in fields
365
+ - `tests/`: package test suite
366
+ - `docs/`: tutorials, guides, diagrams, and whitepaper source
367
+
368
+ ## Development
369
+
370
+ Run tests:
371
+
372
+ ```bash
373
+ uv run pytest
374
+ ```
375
+
376
+ Run type and lint checks:
377
+
378
+ ```bash
379
+ uv run ty check src/json2vec --output-format concise
380
+ uv run ruff check
381
+ ```
382
+
383
+ ## Community
384
+
385
+ Join the [`json2vec` Discord](https://discord.gg/DVyZUkvTFA) for questions,
386
+ design discussion, and release notes.
387
+
388
+ ## License
389
+
390
+ Licensed under the Apache License, Version 2.0. See `LICENSE` and `NOTICE`.
391
+
392
+ ## References
393
+
394
+ - `BIBLIOGRAPHY.md`
395
+ - `CITATION.bib`