PyPI - json2vec - Versions diffs - 0.4.9__tar.gz → 0.4.10__tar.gz - Mend

json2vec 0.4.9tar.gz → 0.4.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

{json2vec-0.4.9/src/json2vec.egg-info → json2vec-0.4.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: json2vec
-Version: 0.4.9
+Version: 0.4.10
 Summary: Schema-first PyTorch models for hierarchical / nested / sequence data structures
 License-Expression: Apache-2.0
 Requires-Python: >=3.12
@@ -28,13 +28,7 @@ Requires-Dist: uvicorn>=0.38.0; extra == "serving"
 Provides-Extra: text
 Requires-Dist: transformers>=4.55.0; extra == "text"
 Provides-Extra: docs
-Requires-Dist: fastapi>=0.124.0; extra == "docs"
-Requires-Dist: mkdocs-material>=9.6; extra == "docs"
-Requires-Dist: mkdocs-jupyter>=0.26.3; extra == "docs"
-Requires-Dist: mkdocstrings[python]>=0.27; extra == "docs"
-Requires-Dist: orjson>=3.10.0; extra == "docs"
-Requires-Dist: pydantic-settings>=2.10.1; extra == "docs"
-Requires-Dist: uvicorn>=0.38.0; extra == "docs"
+Requires-Dist: marimo>=0.23.8; extra == "docs"
 Dynamic: license-file
 <h1 align="center"><code>json2vec</code></h1>
@@ -42,7 +36,7 @@ Dynamic: license-file
 <p align="center">
   <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
   <a href="LICENSE"><img alt="Apache-2.0 license" src="https://img.shields.io/badge/license-Apache--2.0-2E8B57" /></a>
-  <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-MkDocs-526CFE?logo=materialformkdocs&amp;logoColor=white" /></a>
+  <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-Quarto-39729E?logo=quarto&amp;logoColor=white" /></a>
   <!-- discord-invite:start -->
   <a href="https://discord.gg/DVyZUkvTFA"><img alt="Discord channel invite" src="https://img.shields.io/badge/discord-join%20the%20channel-5865F2?logo=discord&amp;logoColor=white" /></a>
   <!-- discord-invite:end -->
@@ -63,7 +57,8 @@ A `json2vec` schema is both a data contract and an architecture blueprint.
 - Leaf fields such as `Number`, `Category`, `Set`, `Entity`, `Text`, and
   `Vector` become datatype-specific tensorfields.
-- `Array` nodes become local context encoders for repeated child objects.
+- `Branch` nodes define shared contexts for child fields, with optional local
+  attention and pooling before the representation flows upward.
 - Targets, masks, pruning, and embeddings are configured on the same schema
   tree.
 - Prediction output is keyed by schema address, so decoded values and
@@ -76,24 +71,23 @@ inference, and serving.
 ## A Model From A Nested Record
 ```python
-import json2vec as j2v
-model = j2v.Model.from_schema(
-    j2v.Category("customer_tier", max_vocab_size=16),
-    j2v.Array(
-        j2v.Category("sku", max_vocab_size=2048),
-        j2v.Number("quantity"),
-        j2v.Number("price"),
-        name="line_items",
-        max_length=32,
-        embed=True,
-    ),
-    j2v.Category("returned", target=True, max_vocab_size=2),
+import json2vec as jv
+model = jv.Model.from_tree(
     name="order",
     d_model=64,
     n_layers=2,
     n_heads=4,
     embed=True,
+    customer_tier=jv.Category(size=16),
+    line_items=jv.Branch(
+        length=32,
+        embed=True,
+        sku=jv.Category(size=2048),
+        quantity=jv.Number,
+        price=jv.Number,
+    ),
+    returned=jv.Category(target=True, size=2),
 )
 ```
@@ -116,8 +110,8 @@ to emit embeddings at configured addresses.
 ## Train With Lightning
-`j2v.Model` is a LightningModule. `j2v.PolarsDataModule` and
-`j2v.StreamingDataModule` are LightningDataModule implementations. The schema
+`jv.Model` is a LightningModule. `jv.PolarsDataModule` and
+`jv.StreamingDataModule` are LightningDataModule implementations. The schema
 defines the model tree, typed losses, prediction outputs, and embeddings;
 Lightning runs `fit`, `validate`, `test`, and `predict`.
@@ -126,23 +120,23 @@ import lightning.pytorch as lit
 import polars as pl
 import torch
-import json2vec as j2v
+import json2vec as jv
 records = pl.read_ndjson("docs/data/iris.jsonl").head(36)
-model = j2v.Model.from_schema(
-    j2v.Number("sepal_length"),
-    j2v.Number("petal_length"),
-    j2v.Category("species", target=True, max_vocab_size=4, topk=[2]),
+model = jv.Model.from_tree(
     d_model=16,
     n_layers=1,
     n_heads=4,
     batch_size=8,
     embed=True,
     optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
+    sepal_length=jv.Number,
+    petal_length=jv.Number,
+    species=jv.Category(target=True, size=4, topk=[2]),
 )
-datamodule = j2v.PolarsDataModule(
+datamodule = jv.PolarsDataModule(
     model=model,
     train=records,
     validate=records,
@@ -170,7 +164,7 @@ trainer.fit(model=model, datamodule=datamodule)
 For larger jobs, the same model can run through normal Lightning callbacks,
 checkpointing, precision settings, device placement, and distributed
 strategies. See
-[Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/).
+[Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning.html).
 ## Predict And Embed
@@ -179,8 +173,8 @@ For small interactive batches, call `model.predict(...)` with raw dictionaries.
 ```python
 predictions = model.predict(records.to_dicts()[:3])
-species = predictions[j2v.Address("record", "species")]
-record = predictions[j2v.Address("record")]
+species = predictions[jv.Address("record", "species")]
+record = predictions[jv.Address("record")]
 print(species["content"]["value"])
 print(species["content"]["probability"])
@@ -188,10 +182,10 @@ print(record["embedding"])
 ```
 For larger offline jobs, configure a `predict` split on a data module and attach
-`j2v.Writer` to Lightning's prediction loop.
+`jv.Writer` to Lightning's prediction loop.
 ```python
-writer = j2v.Writer("predictions")
+writer = jv.Writer("predictions")
 trainer = lit.Trainer(
     accelerator="cpu",
@@ -199,7 +193,7 @@ trainer = lit.Trainer(
     logger=False,
 )
-predict_datamodule = j2v.PolarsDataModule(
+predict_datamodule = jv.PolarsDataModule(
     model=model,
     predict=records.drop("species"),
     num_workers=0,
@@ -213,8 +207,8 @@ trainer.predict(model=model, datamodule=predict_datamodule)
 `Writer` creates rank-partitioned Parquet files such as
 `predictions/rank-0.parquet`. Use a postprocessor when downstream systems need
 flat columns, renamed addresses, redacted payloads, or fewer fields. See
-[Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
-and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/).
+[Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference.html)
+and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors.html).
 ## Learning Modes
@@ -254,7 +248,7 @@ Choose the data module by where the records live:
 arguments are compiled regular expressions matched against discovered file
 paths.
-See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
+See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules.html)
 for split configuration, sharding, sampling, buffers, and preprocessors.
 ## What Makes This Different
@@ -265,7 +259,7 @@ for split configuration, sharding, sampling, buffers, and preprocessors.
   missing-state handling, masking, decoding, loss, metrics, and output writing.
 - **Unified training roles:** `target=True`, `p_prune`, and `p_mask` all use the
   same reconstruction path.
-- **Embedding trees:** embeddings can come from the root, arrays, or selected
+- **Embedding trees:** embeddings can come from the root, branches, or selected
   leaves.
 - **Schema evolution:** fields can be added, removed, updated, reset, or
   temporarily overridden after construction.
@@ -318,55 +312,53 @@ uv sync --extra docs
 The `text` extra installs Hugging Face `transformers`. The `serving` extra
 installs FastAPI-backed deployment dependencies. The `docs` extra installs the
-MkDocs toolchain.
+Python packages used by the Quarto docs.
 ## Documentation Map
 Start with:
-- [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
-- [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
-- [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
-- [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
-- [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
-- [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
-- [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/)
-- [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
-- [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
-- [API Reference](https://json2vec.github.io/json2vec/reference/api/)
+- [Getting Started](https://json2vec.github.io/json2vec/getting-started.html)
+- [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart.html)
+- [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree.html)
+- [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths.html)
+- [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types.html)
+- [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings.html)
+- [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning.html)
+- [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules.html)
+- [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference.html)
 Tutorials and guides:
-- [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
-- [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
-- [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
-- [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
-- [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
-- [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
-- [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/)
-- [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
-- [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking/)
-- [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
-- [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure/)
-- [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
+- [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors.html)
+- [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance.html)
+- [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking.html)
+- [Branch](https://json2vec.github.io/json2vec/data-types/branch.html)
+- [Number](https://json2vec.github.io/json2vec/data-types/number.html)
+- [Category](https://json2vec.github.io/json2vec/data-types/category.html)
+- [Set](https://json2vec.github.io/json2vec/data-types/set.html)
+- [Entity](https://json2vec.github.io/json2vec/data-types/entity.html)
+- [DateParts](https://json2vec.github.io/json2vec/data-types/dateparts.html)
+- [Vector](https://json2vec.github.io/json2vec/data-types/vector.html)
+- [Text](https://json2vec.github.io/json2vec/data-types/text.html)
+- [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure.html)
 Build the docs locally with:
 ```bash
-uv run --extra docs mkdocs build --strict
+make render
 ```
 ## Repository Layout
 - `src/json2vec/architecture`: model assembly, attention, pooling, and routing
-- `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline
+- `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline and preprocessor exports
 - `src/json2vec/inference`: serving and prediction callbacks
 - `src/json2vec/logging`: runtime logging callbacks
-- `src/json2vec/preprocessors`: preprocessor registry
 - `src/json2vec/structs`: pydantic config models, enums, and tree nodes
 - `src/json2vec/tensorfields`: tensorfield plugin system and built-in fields
 - `tests/`: package test suite
-- `docs/`: tutorials, guides, diagrams, and whitepaper source
+- `docs/`: Quarto project, pages, guides, stylesheets, and sample data
 ## Development

{json2vec-0.4.9 → json2vec-0.4.10}/README.md RENAMED Viewed

@@ -3,7 +3,7 @@
 <p align="center">
   <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
   <a href="LICENSE"><img alt="Apache-2.0 license" src="https://img.shields.io/badge/license-Apache--2.0-2E8B57" /></a>
-  <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-MkDocs-526CFE?logo=materialformkdocs&amp;logoColor=white" /></a>
+  <a href="https://json2vec.github.io/json2vec/"><img alt="Documentation" src="https://img.shields.io/badge/docs-Quarto-39729E?logo=quarto&amp;logoColor=white" /></a>
   <!-- discord-invite:start -->
   <a href="https://discord.gg/DVyZUkvTFA"><img alt="Discord channel invite" src="https://img.shields.io/badge/discord-join%20the%20channel-5865F2?logo=discord&amp;logoColor=white" /></a>
   <!-- discord-invite:end -->
@@ -24,7 +24,8 @@ A `json2vec` schema is both a data contract and an architecture blueprint.
 - Leaf fields such as `Number`, `Category`, `Set`, `Entity`, `Text`, and
   `Vector` become datatype-specific tensorfields.
-- `Array` nodes become local context encoders for repeated child objects.
+- `Branch` nodes define shared contexts for child fields, with optional local
+  attention and pooling before the representation flows upward.
 - Targets, masks, pruning, and embeddings are configured on the same schema
   tree.
 - Prediction output is keyed by schema address, so decoded values and
@@ -37,24 +38,23 @@ inference, and serving.
 ## A Model From A Nested Record
 ```python
-import json2vec as j2v
-model = j2v.Model.from_schema(
-    j2v.Category("customer_tier", max_vocab_size=16),
-    j2v.Array(
-        j2v.Category("sku", max_vocab_size=2048),
-        j2v.Number("quantity"),
-        j2v.Number("price"),
-        name="line_items",
-        max_length=32,
-        embed=True,
-    ),
-    j2v.Category("returned", target=True, max_vocab_size=2),
+import json2vec as jv
+model = jv.Model.from_tree(
     name="order",
     d_model=64,
     n_layers=2,
     n_heads=4,
     embed=True,
+    customer_tier=jv.Category(size=16),
+    line_items=jv.Branch(
+        length=32,
+        embed=True,
+        sku=jv.Category(size=2048),
+        quantity=jv.Number,
+        price=jv.Number,
+    ),
+    returned=jv.Category(target=True, size=2),
 )
 ```
@@ -77,8 +77,8 @@ to emit embeddings at configured addresses.
 ## Train With Lightning
-`j2v.Model` is a LightningModule. `j2v.PolarsDataModule` and
-`j2v.StreamingDataModule` are LightningDataModule implementations. The schema
+`jv.Model` is a LightningModule. `jv.PolarsDataModule` and
+`jv.StreamingDataModule` are LightningDataModule implementations. The schema
 defines the model tree, typed losses, prediction outputs, and embeddings;
 Lightning runs `fit`, `validate`, `test`, and `predict`.
@@ -87,23 +87,23 @@ import lightning.pytorch as lit
 import polars as pl
 import torch
-import json2vec as j2v
+import json2vec as jv
 records = pl.read_ndjson("docs/data/iris.jsonl").head(36)
-model = j2v.Model.from_schema(
-    j2v.Number("sepal_length"),
-    j2v.Number("petal_length"),
-    j2v.Category("species", target=True, max_vocab_size=4, topk=[2]),
+model = jv.Model.from_tree(
     d_model=16,
     n_layers=1,
     n_heads=4,
     batch_size=8,
     embed=True,
     optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
+    sepal_length=jv.Number,
+    petal_length=jv.Number,
+    species=jv.Category(target=True, size=4, topk=[2]),
 )
-datamodule = j2v.PolarsDataModule(
+datamodule = jv.PolarsDataModule(
     model=model,
     train=records,
     validate=records,
@@ -131,7 +131,7 @@ trainer.fit(model=model, datamodule=datamodule)
 For larger jobs, the same model can run through normal Lightning callbacks,
 checkpointing, precision settings, device placement, and distributed
 strategies. See
-[Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/).
+[Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning.html).
 ## Predict And Embed
@@ -140,8 +140,8 @@ For small interactive batches, call `model.predict(...)` with raw dictionaries.
 ```python
 predictions = model.predict(records.to_dicts()[:3])
-species = predictions[j2v.Address("record", "species")]
-record = predictions[j2v.Address("record")]
+species = predictions[jv.Address("record", "species")]
+record = predictions[jv.Address("record")]
 print(species["content"]["value"])
 print(species["content"]["probability"])
@@ -149,10 +149,10 @@ print(record["embedding"])
 ```
 For larger offline jobs, configure a `predict` split on a data module and attach
-`j2v.Writer` to Lightning's prediction loop.
+`jv.Writer` to Lightning's prediction loop.
 ```python
-writer = j2v.Writer("predictions")
+writer = jv.Writer("predictions")
 trainer = lit.Trainer(
     accelerator="cpu",
@@ -160,7 +160,7 @@ trainer = lit.Trainer(
     logger=False,
 )
-predict_datamodule = j2v.PolarsDataModule(
+predict_datamodule = jv.PolarsDataModule(
     model=model,
     predict=records.drop("species"),
     num_workers=0,
@@ -174,8 +174,8 @@ trainer.predict(model=model, datamodule=predict_datamodule)
 `Writer` creates rank-partitioned Parquet files such as
 `predictions/rank-0.parquet`. Use a postprocessor when downstream systems need
 flat columns, renamed addresses, redacted payloads, or fewer fields. See
-[Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
-and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/).
+[Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference.html)
+and [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors.html).
 ## Learning Modes
@@ -215,7 +215,7 @@ Choose the data module by where the records live:
 arguments are compiled regular expressions matched against discovered file
 paths.
-See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
+See [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules.html)
 for split configuration, sharding, sampling, buffers, and preprocessors.
 ## What Makes This Different
@@ -226,7 +226,7 @@ for split configuration, sharding, sampling, buffers, and preprocessors.
   missing-state handling, masking, decoding, loss, metrics, and output writing.
 - **Unified training roles:** `target=True`, `p_prune`, and `p_mask` all use the
   same reconstruction path.
-- **Embedding trees:** embeddings can come from the root, arrays, or selected
+- **Embedding trees:** embeddings can come from the root, branches, or selected
   leaves.
 - **Schema evolution:** fields can be added, removed, updated, reset, or
   temporarily overridden after construction.
@@ -279,55 +279,53 @@ uv sync --extra docs
 The `text` extra installs Hugging Face `transformers`. The `serving` extra
 installs FastAPI-backed deployment dependencies. The `docs` extra installs the
-MkDocs toolchain.
+Python packages used by the Quarto docs.
 ## Documentation Map
 Start with:
-- [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
-- [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
-- [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
-- [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
-- [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
-- [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
-- [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning/)
-- [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules/)
-- [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference/)
-- [API Reference](https://json2vec.github.io/json2vec/reference/api/)
+- [Getting Started](https://json2vec.github.io/json2vec/getting-started.html)
+- [AI / Expert Quickstart](https://json2vec.github.io/json2vec/ai-quickstart.html)
+- [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree.html)
+- [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths.html)
+- [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types.html)
+- [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings.html)
+- [Training With Lightning](https://json2vec.github.io/json2vec/guides/lightning.html)
+- [Data Modules](https://json2vec.github.io/json2vec/guides/data-modules.html)
+- [Batch Inference](https://json2vec.github.io/json2vec/guides/batch-inference.html)
 Tutorials and guides:
-- [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
-- [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
-- [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
-- [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
-- [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
-- [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
-- [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors/)
-- [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
-- [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking/)
-- [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
-- [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure/)
-- [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
+- [Postprocessors](https://json2vec.github.io/json2vec/guides/postprocessors.html)
+- [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance.html)
+- [Field Stacking](https://json2vec.github.io/json2vec/guides/field-stacking.html)
+- [Branch](https://json2vec.github.io/json2vec/data-types/branch.html)
+- [Number](https://json2vec.github.io/json2vec/data-types/number.html)
+- [Category](https://json2vec.github.io/json2vec/data-types/category.html)
+- [Set](https://json2vec.github.io/json2vec/data-types/set.html)
+- [Entity](https://json2vec.github.io/json2vec/data-types/entity.html)
+- [DateParts](https://json2vec.github.io/json2vec/data-types/dateparts.html)
+- [Vector](https://json2vec.github.io/json2vec/data-types/vector.html)
+- [Text](https://json2vec.github.io/json2vec/data-types/text.html)
+- [Device Tenure Case Study](https://json2vec.github.io/json2vec/case-studies/device-tenure.html)
 Build the docs locally with:
 ```bash
-uv run --extra docs mkdocs build --strict
+make render
 ```
 ## Repository Layout
 - `src/json2vec/architecture`: model assembly, attention, pooling, and routing
-- `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline
+- `src/json2vec/data`: dataset fetch/read/process/batch/encode pipeline and preprocessor exports
 - `src/json2vec/inference`: serving and prediction callbacks
 - `src/json2vec/logging`: runtime logging callbacks
-- `src/json2vec/preprocessors`: preprocessor registry
 - `src/json2vec/structs`: pydantic config models, enums, and tree nodes
 - `src/json2vec/tensorfields`: tensorfield plugin system and built-in fields
 - `tests/`: package test suite
-- `docs/`: tutorials, guides, diagrams, and whitepaper source
+- `docs/`: Quarto project, pages, guides, stylesheets, and sample data
 ## Development

{json2vec-0.4.9 → json2vec-0.4.10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "json2vec"
-version = "0.4.9"
+version = "0.4.10"
 description = "Schema-first PyTorch models for hierarchical / nested / sequence data structures"
 readme = "README.md"
 license = "Apache-2.0"
@@ -32,13 +32,7 @@ text = [
     "transformers>=4.55.0",
 ]
 docs = [
-    "fastapi>=0.124.0",
-    "mkdocs-material>=9.6",
-    "mkdocs-jupyter>=0.26.3",
-    "mkdocstrings[python]>=0.27",
-    "orjson>=3.10.0",
-    "pydantic-settings>=2.10.1",
-    "uvicorn>=0.38.0",
+    "marimo>=0.23.8",
 ]
 [dependency-groups]

{json2vec-0.4.9 → json2vec-0.4.10}/src/json2vec/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 """Public `json2vec` SDK surface.
 The top-level package exports the constructors and helpers used by most
-applications: `Model.from_schema(...)` for model construction, tensorfield
+applications: `Model.from_tree(...)` for model construction, tensorfield
 request constructors such as `Category` and `Number`, data modules, schema
 mutation predicates, and the `@preprocess` decorator.
 """
@@ -17,9 +17,22 @@ from json2vec.architecture.root import (
     SchedulerConfig,
 )
 from json2vec.data.datasets import CustomDataModule, PolarsDataModule, StreamingDataModule
-from json2vec.data.processing import MASK_LITERAL, MaskLiteral
-from json2vec.inference.callback import Postprocessor, Writer
-from json2vec.preprocessors import PREPROCESSORS, Preprocessor, PreprocessorMode, preprocess
+from json2vec.data.nested import MASK_LITERAL, MaskLiteral
+from json2vec.data.processors import (
+    Metadata,
+    Observation,
+    Postprocessor,
+    PostprocessorProvider,
+    PostprocessorResult,
+    Predictions,
+    Preprocessor,
+    PreprocessorProvider,
+    RawBatch,
+    RawObservation,
+    postprocess,
+    preprocess,
+)
+from json2vec.inference.callback import Writer
 from json2vec.structs.enums import (
     AttentionMode,
     Component,
@@ -32,14 +45,14 @@ from json2vec.structs.enums import (
     Tokens,
 )
 from json2vec.structs.experiment import (
-    Hyperparameters,
     NodeAttribute,
     NodePredicate,
+    Schema,
     SchemaField,
     predicate,
     where,
 )
-from json2vec.structs.structure import Array, Mask
+from json2vec.structs.structure import Branch, Mask
 from json2vec.structs.tree import Address, Leaf
 from json2vec.tensorfields import TENSORFIELDS, DecoderBase, EmbedderBase, Plugin, RequestBase, TensorFieldBase
 from json2vec.tensorfields.extensions.category import Request as Category
@@ -96,7 +109,7 @@ def __dir__() -> list[str]:
 __all__ = [
     "Address",
     "Accelerator",
-    "Array",
+    "Branch",
     "AttentionMode",
     "Category",
     "Component",
@@ -107,7 +120,7 @@ __all__ = [
     "EmbedderBase",
     "Entity",
     "helpers",
-    "Hyperparameters",
+    "Schema",
     "Input",
     "JSONBackend",
     "Leaf",
@@ -115,20 +128,26 @@ __all__ = [
     "MASK_LITERAL",
     "Mask",
     "MaskLiteral",
+    "Metadata",
     "Model",
     "ModelSource",
     "MutationLockCallback",
     "NodeAttribute",
     "NodePredicate",
     "Number",
+    "Observation",
     "OptimizerConfig",
     "Overflow",
-    "PREPROCESSORS",
     "Plugin",
     "PolarsDataModule",
     "Postprocessor",
+    "PostprocessorProvider",
+    "PostprocessorResult",
+    "Predictions",
     "Preprocessor",
-    "PreprocessorMode",
+    "PreprocessorProvider",
+    "RawBatch",
+    "RawObservation",
     "RequestBase",
     "RollbackCheckpoint",
     "RuntimePlacementCallback",
@@ -149,6 +168,7 @@ __all__ = [
     "VocabularySyncCallback",
     "Writer",
     "predicate",
+    "postprocess",
     "preprocess",
     "where",
 ]

json2vec 0.4.9__tar.gz → 0.4.10__tar.gz

json2vec 0.4.9tar.gz → 0.4.10tar.gz