json2vec 0.4.5__tar.gz → 0.4.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {json2vec-0.4.5/src/json2vec.egg-info → json2vec-0.4.6}/PKG-INFO +24 -21
  2. {json2vec-0.4.5 → json2vec-0.4.6}/README.md +21 -20
  3. {json2vec-0.4.5 → json2vec-0.4.6}/pyproject.toml +3 -1
  4. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/__init__.py +60 -1
  5. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/checkpoint.py +1 -1
  6. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/encoder.py +1 -1
  7. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/plot.py +2 -3
  8. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/root.py +17 -61
  9. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/runtime.py +52 -44
  10. json2vec-0.4.6/src/json2vec/inference/__init__.py +64 -0
  11. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/inference/callback.py +17 -25
  12. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/inference/deployment.py +6 -11
  13. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/preprocessors/base.py +1 -1
  14. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/experiment.py +6 -14
  15. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/packages.py +0 -25
  16. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/structure.py +1 -2
  17. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/tree.py +43 -43
  18. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/base.py +4 -1
  19. {json2vec-0.4.5 → json2vec-0.4.6/src/json2vec.egg-info}/PKG-INFO +24 -21
  20. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec.egg-info/requires.txt +2 -0
  21. {json2vec-0.4.5 → json2vec-0.4.6}/tests/test_public_api.py +10 -0
  22. json2vec-0.4.5/src/json2vec/structs/__init__.py +0 -0
  23. {json2vec-0.4.5 → json2vec-0.4.6}/LICENSE +0 -0
  24. {json2vec-0.4.5 → json2vec-0.4.6}/NOTICE +0 -0
  25. {json2vec-0.4.5 → json2vec-0.4.6}/setup.cfg +0 -0
  26. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/__init__.py +0 -0
  27. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/attention.py +0 -0
  28. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/contracts.py +0 -0
  29. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/graph.py +0 -0
  30. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/mutations.py +0 -0
  31. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/node.py +0 -0
  32. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/pool.py +0 -0
  33. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/architecture/rotary.py +0 -0
  34. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/__init__.py +0 -0
  35. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/datasets/__init__.py +0 -0
  36. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/datasets/base.py +0 -0
  37. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/datasets/polars.py +0 -0
  38. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/datasets/streaming.py +0 -0
  39. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/iterables.py +0 -0
  40. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/data/processing.py +0 -0
  41. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/distributed.py +0 -0
  42. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/logging/__init__.py +0 -0
  43. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/logging/config.py +0 -0
  44. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/logging/epoch.py +0 -0
  45. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/logging/throughput.py +0 -0
  46. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/preprocessors/__init__.py +0 -0
  47. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/preprocessors/extensions/__init__.py +0 -0
  48. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/preprocessors/spec.py +0 -0
  49. {json2vec-0.4.5/src/json2vec/inference → json2vec-0.4.6/src/json2vec/structs}/__init__.py +0 -0
  50. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/enums.py +0 -0
  51. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/structs/selectors.py +0 -0
  52. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/__init__.py +0 -0
  53. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/__init__.py +0 -0
  54. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/category.py +0 -0
  55. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/dateparts.py +0 -0
  56. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/entity.py +0 -0
  57. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/number.py +0 -0
  58. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/set.py +0 -0
  59. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/text.py +0 -0
  60. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/extensions/vector.py +0 -0
  61. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/shared/__init__.py +0 -0
  62. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/shared/counter.py +0 -0
  63. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/shared/vocabulary.py +0 -0
  64. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec/tensorfields/spec.py +0 -0
  65. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec.egg-info/SOURCES.txt +0 -0
  66. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec.egg-info/dependency_links.txt +0 -0
  67. {json2vec-0.4.5 → json2vec-0.4.6}/src/json2vec.egg-info/top_level.txt +0 -0
  68. {json2vec-0.4.5 → json2vec-0.4.6}/tests/test_callbacks.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: json2vec
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Summary: {...} -> [*]
5
5
  License-Expression: Apache-2.0
6
6
  Requires-Python: >=3.12
@@ -27,16 +27,18 @@ Requires-Dist: pydantic-settings>=2.10.1; extra == "serving"
27
27
  Provides-Extra: text
28
28
  Requires-Dist: transformers>=4.55.0; extra == "text"
29
29
  Provides-Extra: docs
30
+ Requires-Dist: litserve>=0.2.13; extra == "docs"
30
31
  Requires-Dist: mkdocs-material>=9.6; extra == "docs"
31
32
  Requires-Dist: mkdocs-jupyter>=0.26.3; extra == "docs"
32
33
  Requires-Dist: mkdocstrings[python]>=0.27; extra == "docs"
34
+ Requires-Dist: pydantic-settings>=2.10.1; extra == "docs"
33
35
  Dynamic: license-file
34
36
 
35
37
  <p align="center">
36
- <img src="https://json2vec.github.io/json2vec/diagrams/json2vec.png" alt="JSON2Vec logo" width="180">
38
+ <img src="https://json2vec.github.io/json2vec/diagrams/json2vec.png" alt="json2vec logo" width="180">
37
39
  </p>
38
40
 
39
- <h1 align="center">JSON2Vec</h1>
41
+ <h1 align="center"><code>json2vec</code></h1>
40
42
 
41
43
  <p align="center">
42
44
  <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
@@ -69,14 +71,14 @@ schemas, and checkpoints private.
69
71
  - **Extensible data types for predictive modeling.** Masked values,
70
72
  targeted fields, and explicit supervised targets all flow through the same
71
73
  datatype-specific heads. A new
72
- [tensorfield type](https://json2vec.github.io/json2vec/guides/tensorfields/) brings its own embedding,
74
+ [tensorfield type](https://json2vec.github.io/json2vec/data-types/tensorfields/) brings its own embedding,
73
75
  decoding, loss, and writing logic, so the framework stays reusable as schemas
74
76
  grow.
75
77
  - **Schema evolution is a first-class workflow.** Between training loops
76
78
  (pretraining, finetuning, refitting, and task adaptation), the model can be
77
79
  mutated. Fields can be added (`model.extend`), removed (`model.delete`),
78
80
  updated (`model.update` / `with model.override`), and reset (`model.reset`).
79
- See the [model update guide](https://json2vec.github.io/json2vec/guides/model-update/).
81
+ See the [mutations guide](https://json2vec.github.io/json2vec/core-concepts/mutations/).
80
82
  - **Production semantics for missingness.** `null`, `padded`, `masked`, and
81
83
  `valued` are distinct states in the tensorfield type system.
82
84
  They are not collapsed into one generic missing-value bucket.
@@ -104,7 +106,7 @@ Use `json2vec` when the hierarchy is part of the signal:
104
106
  multi-target prediction over nested records
105
107
 
106
108
  For more context on the modeling problem, read
107
- [Why JSON2Vec](https://json2vec.github.io/json2vec/motivation/).
109
+ [Why `json2vec`](https://json2vec.github.io/json2vec/motivation/).
108
110
 
109
111
  ## What It Does Not Do
110
112
 
@@ -171,8 +173,8 @@ model = j2v.Model.from_schema(
171
173
  optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
172
174
  )
173
175
 
174
- datamodule = j2v.PolarsDataModule.from_model(
175
- model,
176
+ datamodule = j2v.PolarsDataModule(
177
+ model=model,
176
178
  train=records,
177
179
  validate=records,
178
180
  num_workers=0,
@@ -195,14 +197,11 @@ trainer = lit.Trainer(
195
197
 
196
198
  trainer.fit(model=model, datamodule=datamodule)
197
199
 
198
- batch = [[record] for record in records.to_dicts()[:3]]
199
-
200
- pprint(model.predict(batch))
201
- pprint(model.embed(batch))
200
+ pprint(model.predict(records.to_dicts()[:3]))
202
201
  ```
203
202
 
204
- The prediction call returns a typed result for `record/species`. The embedding
205
- call returns the configured `record` embedding for each input observation.
203
+ The prediction call returns a typed result for `record/species` and the
204
+ configured `record` embedding for each input observation.
206
205
 
207
206
  ## Documentation
208
207
 
@@ -216,16 +215,20 @@ uv run --extra docs mkdocs build --strict
216
215
  Useful entry points:
217
216
 
218
217
  - [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
219
- - [Why JSON2Vec](https://json2vec.github.io/json2vec/motivation/)
220
- - [Schemas & Queries](https://json2vec.github.io/json2vec/guides/model-schemas/)
221
- - [Model Updates](https://json2vec.github.io/json2vec/guides/model-update/)
218
+ - [AI Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
219
+ - [Why `json2vec`](https://json2vec.github.io/json2vec/motivation/)
220
+ - [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
221
+ - [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
222
+ - [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
223
+ - [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
224
+ - [Mutations](https://json2vec.github.io/json2vec/core-concepts/mutations/)
222
225
  - [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
223
- - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
224
226
  - [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
227
+ - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
225
228
  - [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
226
- - [Field Ablation](https://json2vec.github.io/json2vec/guides/field-ablation/)
229
+ - [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
227
230
  - [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
228
- - [Tensorfield Extensions](https://json2vec.github.io/json2vec/guides/tensorfields/)
231
+ - [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
229
232
  - [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
230
233
  - [API Reference](https://json2vec.github.io/json2vec/reference/api/)
231
234
  - [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
@@ -292,7 +295,7 @@ Configured `dataset.kwargs` are passed into the preprocessor, with unsupported k
292
295
 
293
296
  Each tensorfield plugin provides a request schema plus the model components
294
297
  needed to encode values, decode predictions, compute losses, and optionally
295
- serialize outputs. See [Tensorfield Extensions](https://json2vec.github.io/json2vec/guides/tensorfields/)
298
+ serialize outputs. See [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
296
299
  for a custom plugin walkthrough. Built-in tensorfields share the base leaf
297
300
  options `name`, `query`, `pooling`, `weight`, `n_heads`, `n_linear`, `dropout`,
298
301
  `p_mask`, and `p_prune`.
@@ -1,8 +1,8 @@
1
1
  <p align="center">
2
- <img src="https://json2vec.github.io/json2vec/diagrams/json2vec.png" alt="JSON2Vec logo" width="180">
2
+ <img src="https://json2vec.github.io/json2vec/diagrams/json2vec.png" alt="json2vec logo" width="180">
3
3
  </p>
4
4
 
5
- <h1 align="center">JSON2Vec</h1>
5
+ <h1 align="center"><code>json2vec</code></h1>
6
6
 
7
7
  <p align="center">
8
8
  <img alt="Python 3.12+" src="https://img.shields.io/badge/python-3.12%2B-3776AB?logo=python&amp;logoColor=white" />
@@ -35,14 +35,14 @@ schemas, and checkpoints private.
35
35
  - **Extensible data types for predictive modeling.** Masked values,
36
36
  targeted fields, and explicit supervised targets all flow through the same
37
37
  datatype-specific heads. A new
38
- [tensorfield type](https://json2vec.github.io/json2vec/guides/tensorfields/) brings its own embedding,
38
+ [tensorfield type](https://json2vec.github.io/json2vec/data-types/tensorfields/) brings its own embedding,
39
39
  decoding, loss, and writing logic, so the framework stays reusable as schemas
40
40
  grow.
41
41
  - **Schema evolution is a first-class workflow.** Between training loops
42
42
  (pretraining, finetuning, refitting, and task adaptation), the model can be
43
43
  mutated. Fields can be added (`model.extend`), removed (`model.delete`),
44
44
  updated (`model.update` / `with model.override`), and reset (`model.reset`).
45
- See the [model update guide](https://json2vec.github.io/json2vec/guides/model-update/).
45
+ See the [mutations guide](https://json2vec.github.io/json2vec/core-concepts/mutations/).
46
46
  - **Production semantics for missingness.** `null`, `padded`, `masked`, and
47
47
  `valued` are distinct states in the tensorfield type system.
48
48
  They are not collapsed into one generic missing-value bucket.
@@ -70,7 +70,7 @@ Use `json2vec` when the hierarchy is part of the signal:
70
70
  multi-target prediction over nested records
71
71
 
72
72
  For more context on the modeling problem, read
73
- [Why JSON2Vec](https://json2vec.github.io/json2vec/motivation/).
73
+ [Why `json2vec`](https://json2vec.github.io/json2vec/motivation/).
74
74
 
75
75
  ## What It Does Not Do
76
76
 
@@ -137,8 +137,8 @@ model = j2v.Model.from_schema(
137
137
  optimizer=lambda module: torch.optim.AdamW(module.parameters(), lr=1e-2),
138
138
  )
139
139
 
140
- datamodule = j2v.PolarsDataModule.from_model(
141
- model,
140
+ datamodule = j2v.PolarsDataModule(
141
+ model=model,
142
142
  train=records,
143
143
  validate=records,
144
144
  num_workers=0,
@@ -161,14 +161,11 @@ trainer = lit.Trainer(
161
161
 
162
162
  trainer.fit(model=model, datamodule=datamodule)
163
163
 
164
- batch = [[record] for record in records.to_dicts()[:3]]
165
-
166
- pprint(model.predict(batch))
167
- pprint(model.embed(batch))
164
+ pprint(model.predict(records.to_dicts()[:3]))
168
165
  ```
169
166
 
170
- The prediction call returns a typed result for `record/species`. The embedding
171
- call returns the configured `record` embedding for each input observation.
167
+ The prediction call returns a typed result for `record/species` and the
168
+ configured `record` embedding for each input observation.
172
169
 
173
170
  ## Documentation
174
171
 
@@ -182,16 +179,20 @@ uv run --extra docs mkdocs build --strict
182
179
  Useful entry points:
183
180
 
184
181
  - [Getting Started](https://json2vec.github.io/json2vec/getting-started/)
185
- - [Why JSON2Vec](https://json2vec.github.io/json2vec/motivation/)
186
- - [Schemas & Queries](https://json2vec.github.io/json2vec/guides/model-schemas/)
187
- - [Model Updates](https://json2vec.github.io/json2vec/guides/model-update/)
182
+ - [AI Quickstart](https://json2vec.github.io/json2vec/ai-quickstart/)
183
+ - [Why `json2vec`](https://json2vec.github.io/json2vec/motivation/)
184
+ - [Query Paths](https://json2vec.github.io/json2vec/core-concepts/querypaths/)
185
+ - [Built-In Data Types](https://json2vec.github.io/json2vec/core-concepts/data-types/)
186
+ - [Learning Modes & Embeddings](https://json2vec.github.io/json2vec/core-concepts/embeddings/)
187
+ - [Model Tree](https://json2vec.github.io/json2vec/core-concepts/model-tree/)
188
+ - [Mutations](https://json2vec.github.io/json2vec/core-concepts/mutations/)
188
189
  - [Hello World](https://json2vec.github.io/json2vec/tutorials/hello-world/)
189
- - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
190
190
  - [Nested Supervised Training](https://json2vec.github.io/json2vec/tutorials/nested-supervised-training/)
191
+ - [Masked Pretraining](https://json2vec.github.io/json2vec/tutorials/pretraining/)
191
192
  - [Supervised Tabular Training](https://json2vec.github.io/json2vec/tutorials/supervised-tabular-training/)
192
- - [Field Ablation](https://json2vec.github.io/json2vec/guides/field-ablation/)
193
+ - [Field Importance](https://json2vec.github.io/json2vec/guides/field-importance/)
193
194
  - [Preprocessors](https://json2vec.github.io/json2vec/guides/preprocessors/)
194
- - [Tensorfield Extensions](https://json2vec.github.io/json2vec/guides/tensorfields/)
195
+ - [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
195
196
  - [Serving](https://json2vec.github.io/json2vec/tutorials/serving/)
196
197
  - [API Reference](https://json2vec.github.io/json2vec/reference/api/)
197
198
  - [Whitepaper](https://json2vec.github.io/json2vec/whitepaper.pdf)
@@ -258,7 +259,7 @@ Configured `dataset.kwargs` are passed into the preprocessor, with unsupported k
258
259
 
259
260
  Each tensorfield plugin provides a request schema plus the model components
260
261
  needed to encode values, decode predictions, compute losses, and optionally
261
- serialize outputs. See [Tensorfield Extensions](https://json2vec.github.io/json2vec/guides/tensorfields/)
262
+ serialize outputs. See [Custom Data Types](https://json2vec.github.io/json2vec/data-types/tensorfields/)
262
263
  for a custom plugin walkthrough. Built-in tensorfields share the base leaf
263
264
  options `name`, `query`, `pooling`, `weight`, `n_heads`, `n_linear`, `dropout`,
264
265
  `p_mask`, and `p_prune`.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "json2vec"
3
- version = "0.4.5"
3
+ version = "0.4.6"
4
4
  description = "{...} -> [*]"
5
5
  readme = "README.md"
6
6
  license = "Apache-2.0"
@@ -31,9 +31,11 @@ text = [
31
31
  "transformers>=4.55.0",
32
32
  ]
33
33
  docs = [
34
+ "litserve>=0.2.13",
34
35
  "mkdocs-material>=9.6",
35
36
  "mkdocs-jupyter>=0.26.3",
36
37
  "mkdocstrings[python]>=0.27",
38
+ "pydantic-settings>=2.10.1",
37
39
  ]
38
40
 
39
41
  [dependency-groups]
@@ -1,4 +1,4 @@
1
- """Public JSON2Vec SDK surface.
1
+ """Public `json2vec` SDK surface.
2
2
 
3
3
  The top-level package exports the constructors and helpers used by most
4
4
  applications: `Model.from_schema(...)` for model construction, tensorfield
@@ -6,6 +6,8 @@ request constructors such as `Category` and `Number`, data modules, schema
6
6
  mutation predicates, and the `@preprocess` decorator.
7
7
  """
8
8
 
9
+ from typing import TYPE_CHECKING, Any
10
+
9
11
  from json2vec.architecture.root import (
10
12
  Model,
11
13
  MutationLockCallback,
@@ -15,6 +17,7 @@ from json2vec.architecture.root import (
15
17
  SchedulerConfig,
16
18
  )
17
19
  from json2vec.data.datasets import PolarsDataModule, StreamingDataModule
20
+ from json2vec.inference.callback import Postprocessor, Writer
18
21
  from json2vec.preprocessors import PREPROCESSORS, Preprocessor, PreprocessorMode, preprocess
19
22
  from json2vec.structs.enums import AttentionMode, Component, Metric, ShardingStrategy, Strata, Suffix, TensorKey, Tokens
20
23
  from json2vec.structs.experiment import (
@@ -37,20 +40,73 @@ from json2vec.tensorfields.extensions.text import Request as Text
37
40
  from json2vec.tensorfields.extensions.vector import Request as Vector
38
41
  from json2vec.tensorfields.shared.vocabulary import VocabularySyncCallback
39
42
 
43
+ if TYPE_CHECKING:
44
+ from json2vec.inference.deployment import (
45
+ API,
46
+ Accelerator,
47
+ BatchItem,
48
+ Deployment,
49
+ ErrorItem,
50
+ Input,
51
+ ModelSource,
52
+ UpdateOperation,
53
+ )
54
+
55
+ _SERVING_EXPORTS = {
56
+ "API",
57
+ "Accelerator",
58
+ "BatchItem",
59
+ "Deployment",
60
+ "ErrorItem",
61
+ "Input",
62
+ "ModelSource",
63
+ "UpdateOperation",
64
+ }
65
+
66
+
67
+ def __getattr__(name: str) -> Any:
68
+ if name not in _SERVING_EXPORTS:
69
+ raise AttributeError(f"module 'json2vec' has no attribute {name!r}")
70
+
71
+ try:
72
+ from json2vec.inference import deployment
73
+ except ModuleNotFoundError as error:
74
+ if error.name in {"litserve", "pydantic_settings"}:
75
+ raise ModuleNotFoundError(
76
+ f"json2vec.{name} requires the serving extra; install with `pip install json2vec[serving]`."
77
+ ) from error
78
+ raise
79
+
80
+ value = getattr(deployment, name)
81
+ globals()[name] = value
82
+ return value
83
+
84
+
85
+ def __dir__() -> list[str]:
86
+ return sorted([*globals(), *_SERVING_EXPORTS])
87
+
88
+
40
89
  __all__ = [
41
90
  "Address",
91
+ "API",
92
+ "Accelerator",
42
93
  "Array",
43
94
  "AttentionMode",
95
+ "BatchItem",
44
96
  "Category",
45
97
  "Component",
46
98
  "DateParts",
47
99
  "DecoderBase",
100
+ "Deployment",
48
101
  "EmbedderBase",
49
102
  "Entity",
103
+ "ErrorItem",
50
104
  "Hyperparameters",
105
+ "Input",
51
106
  "Leaf",
52
107
  "Metric",
53
108
  "Model",
109
+ "ModelSource",
54
110
  "MutationLockCallback",
55
111
  "NodeAttribute",
56
112
  "NodePredicate",
@@ -59,6 +115,7 @@ __all__ = [
59
115
  "PREPROCESSORS",
60
116
  "Plugin",
61
117
  "PolarsDataModule",
118
+ "Postprocessor",
62
119
  "Preprocessor",
63
120
  "PreprocessorMode",
64
121
  "RequestBase",
@@ -76,8 +133,10 @@ __all__ = [
76
133
  "TensorKey",
77
134
  "Text",
78
135
  "Tokens",
136
+ "UpdateOperation",
79
137
  "Vector",
80
138
  "VocabularySyncCallback",
139
+ "Writer",
81
140
  "predicate",
82
141
  "preprocess",
83
142
  "where",
@@ -1,4 +1,4 @@
1
- """Checkpoint serialization helpers for JSON2Vec models."""
1
+ """Checkpoint serialization helpers for `json2vec` models."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -76,7 +76,7 @@ class ArrayEncoder(torch.nn.Module):
76
76
  self.encoder = torch.nn.ModuleList(layers)
77
77
 
78
78
  self.pool = LearnedQueryCrossAttention(
79
- n_context=array.n_outputs,
79
+ n_context=1,
80
80
  d_model=hyperparameters.d_model,
81
81
  nhead=array.n_heads,
82
82
  dropout=dropout,
@@ -88,7 +88,7 @@ def render_schema_plot(
88
88
  ) -> RenderableType:
89
89
  hyperparameters = module.hyperparameters
90
90
  root = hyperparameters.fields if address is None else resolve_node(hyperparameters=hyperparameters, address=address)
91
- title = "JSON2Vec State" if state_focus else "JSON2Vec Schema"
91
+ title = "json2vec State" if state_focus else "json2vec Schema"
92
92
 
93
93
  tree = Tree(render_node_label(module=module, node=root, state_focus=state_focus), guide_style="dim")
94
94
  append_schema_children(tree=tree, module=module, node=root, detail=detail or state_focus, state_focus=state_focus)
@@ -252,7 +252,6 @@ def node_metadata_keys(node: Node, values: dict[str, Any], state_focus: bool) ->
252
252
  "d_model",
253
253
  "attention",
254
254
  "max_length",
255
- "n_outputs",
256
255
  "n_layers",
257
256
  "n_heads",
258
257
  "batch_size",
@@ -267,7 +266,7 @@ def node_metadata_keys(node: Node, values: dict[str, Any], state_focus: bool) ->
267
266
  elif isinstance(node, Leaf):
268
267
  preferred = ["query", "pooling", "max_vocab_size", "topk", "objective", "weight"]
269
268
  else:
270
- preferred = ["attention", "max_length", "n_outputs", "n_layers", "n_heads"]
269
+ preferred = ["attention", "max_length", "n_layers", "n_heads"]
271
270
 
272
271
  remaining = [key for key in values if key not in preferred]
273
272
  return preferred + remaining
@@ -1,4 +1,4 @@
1
- """Public Lightning model facade for JSON2Vec schemas."""
1
+ """Public Lightning model facade for `json2vec` schemas."""
2
2
 
3
3
  from collections import Counter
4
4
  from collections.abc import Callable, Iterator, Sequence
@@ -20,7 +20,7 @@ from json2vec.architecture.contracts import ContractScheduler
20
20
  from json2vec.architecture.graph import ModelGraph
21
21
  from json2vec.architecture.mutations import SchemaEditor
22
22
  from json2vec.architecture.plot import PlotMode
23
- from json2vec.architecture.runtime import EvaluationResult, ModelRuntime, Postprocessor, PreprocessFn, step
23
+ from json2vec.architecture.runtime import ModelRuntime, Postprocessor, Preprocessor, step
24
24
  from json2vec.data.datasets.base import EncodedBatch, EncodedInput
25
25
  from json2vec.structs.enums import AttentionMode, Strata
26
26
  from json2vec.structs.experiment import (
@@ -30,7 +30,7 @@ from json2vec.structs.experiment import (
30
30
  SchemaField,
31
31
  )
32
32
  from json2vec.structs.packages import Prediction
33
- from json2vec.structs.tree import Address, Node, PruneRate, Rate
33
+ from json2vec.structs.tree import Address, Node, Rate
34
34
  from json2vec.tensorfields.base import TENSORFIELDS, Plugin, TensorFieldBase
35
35
 
36
36
  OptimizerConfig = torch.optim.Optimizer | Callable[["Model"], torch.optim.Optimizer]
@@ -138,11 +138,11 @@ class RollbackCheckpoint(ModelCheckpoint):
138
138
 
139
139
 
140
140
  class Model(lit.LightningModule):
141
- """Neural model generated from a JSON2Vec schema tree.
141
+ """Neural model generated from a `json2vec` schema tree.
142
142
 
143
143
  `Model` owns the schema hyperparameters, tensorfield embedders, array
144
- encoders, decoders, and convenience methods for prediction, embedding,
145
- checkpointing, plotting, and schema mutation.
144
+ encoders, decoders, and convenience methods for prediction, checkpointing,
145
+ plotting, and schema mutation.
146
146
 
147
147
  Example:
148
148
  ```python
@@ -169,16 +169,13 @@ class Model(lit.LightningModule):
169
169
  n_heads: int,
170
170
  batch_size: int = 1,
171
171
  fields: Sequence[SchemaField] | None = None,
172
- root: str = "record",
172
+ name: str = "record",
173
173
  description: str | None = None,
174
174
  embed: bool = False,
175
175
  attention: AttentionMode | str = AttentionMode.mha,
176
176
  max_length: int = 1,
177
- n_outputs: int = 1,
178
177
  n_linear: int = 1,
179
178
  dropout: Rate | None = None,
180
- p_mask: Rate | None = None,
181
- p_prune: PruneRate | None = None,
182
179
  optimizer: OptimizerConfig | None = None,
183
180
  scheduler: SchedulerConfig | None = None,
184
181
  ) -> Self:
@@ -193,16 +190,13 @@ class Model(lit.LightningModule):
193
190
  batch_size: Batch size used by data modules, examples, and mocked
194
191
  Lightning input arrays.
195
192
  fields: Optional sequence form of `field_args`.
196
- root: Root array name. Defaults to `record`.
193
+ name: Root array name. Defaults to `record`.
197
194
  description: Optional description on the generated root array.
198
195
  embed: Configure the generated root array as an embedding output.
199
196
  attention: Attention mode for the generated root array.
200
197
  max_length: Maximum number of records per observation at the root.
201
- n_outputs: Number of pooled outputs emitted by the generated root array.
202
198
  n_linear: Feed-forward block count on the generated root array.
203
199
  dropout: Optional dropout rate on the generated root array.
204
- p_mask: Optional mask rate on the generated root array.
205
- p_prune: Optional prune rate on the generated root array.
206
200
  optimizer: Optimizer instance or factory used by Lightning training.
207
201
  scheduler: Optional scheduler config or factory.
208
202
 
@@ -215,16 +209,13 @@ class Model(lit.LightningModule):
215
209
  n_layers=n_layers,
216
210
  n_heads=n_heads,
217
211
  fields=fields,
218
- root=root,
212
+ name=name,
219
213
  description=description,
220
214
  embed=embed,
221
215
  attention=attention,
222
216
  max_length=max_length,
223
- n_outputs=n_outputs,
224
217
  n_linear=n_linear,
225
218
  dropout=dropout,
226
- p_mask=p_mask,
227
- p_prune=p_prune,
228
219
  )
229
220
  return cls(
230
221
  hyperparameters=hyperparameters,
@@ -299,7 +290,7 @@ class Model(lit.LightningModule):
299
290
  """Mutate selected schema nodes and rebuild compatible modules.
300
291
 
301
292
  `target=True` is shorthand for `p_prune=1.0`; `target=False` clears
302
- target behavior by setting `p_prune=None`.
293
+ target behavior by setting `p_prune=0.0`.
303
294
 
304
295
  Args:
305
296
  *predicates: Predicates used to select nodes.
@@ -507,7 +498,7 @@ class Model(lit.LightningModule):
507
498
  CheckpointState.dump(self, checkpoint)
508
499
 
509
500
  def restore_checkpoint_state(self, checkpoint: dict[str, Any]) -> None:
510
- """Restore this model in place from a JSON2Vec checkpoint dictionary."""
501
+ """Restore this model in place from a `json2vec` checkpoint dictionary."""
511
502
  CheckpointState.restore(self, checkpoint)
512
503
 
513
504
  @classmethod
@@ -515,16 +506,14 @@ class Model(lit.LightningModule):
515
506
  """Load a `Model` checkpoint written by `Model.save(...)`."""
516
507
  return cast(Self, CheckpointState.load(cls, checkpoint))
517
508
 
518
- def write(
519
- self, predictions: list[Prediction]
520
- ) -> tuple[dict[Address, dict[str, Any]], dict[Address, dict[str, Any]]]:
509
+ def write(self, predictions: list[Prediction]) -> dict[Address, dict[str, Any]]:
521
510
  return ModelRuntime.write(self, predictions)
522
511
 
523
512
  @immutable("inference")
524
513
  def encode(
525
514
  self,
526
515
  batch: EncodedBatch | list[dict[str, Any]],
527
- preprocess: PreprocessFn | None = None,
516
+ preprocess: Preprocessor | None = None,
528
517
  strata: Strata | str = Strata.predict,
529
518
  ) -> EncodedInput:
530
519
  """Return encoded tensorfield inputs for raw or processed observations."""
@@ -536,52 +525,19 @@ class Model(lit.LightningModule):
536
525
  )
537
526
 
538
527
  @immutable("inference")
539
- def evaluate(
540
- self,
541
- batch: EncodedBatch | list[dict[str, Any]],
542
- preprocess: PreprocessFn | None = None,
543
- postprocess: Postprocessor | None = None,
544
- ) -> EvaluationResult:
545
- """Run prediction and embedding for encoded or raw observations.
546
-
547
- If `preprocess` is omitted, raw records are encoded unchanged.
548
- """
549
- return ModelRuntime.evaluate(
550
- self,
551
- batch=batch,
552
- preprocess=preprocess,
553
- postprocess=postprocess,
554
- )
555
-
556
528
  def predict(
557
529
  self,
558
530
  batch: EncodedBatch | list[dict[str, Any]],
559
- preprocess: PreprocessFn | None = None,
531
+ preprocess: Preprocessor | None = None,
560
532
  postprocess: Postprocessor | None = None,
561
533
  ) -> dict[Address, dict[str, Any]]:
562
- """Return typed predictions for a raw or encoded batch."""
563
-
564
- result = self.evaluate(
565
- batch=batch,
566
- preprocess=preprocess,
567
- postprocess=postprocess,
568
- )
569
-
570
- return result.predictions
571
-
572
- def embed(
573
- self,
574
- batch: EncodedBatch | list[dict[str, Any]],
575
- preprocess: PreprocessFn | None = None,
576
- postprocess: Postprocessor | None = None,
577
- ) -> dict[Address, dict[str, Any]]:
578
- """Return configured embeddings for a raw or encoded batch."""
579
- result = self.evaluate(
534
+ """Return typed predictions and configured embeddings for a raw or encoded batch."""
535
+ return ModelRuntime.predict(
536
+ self,
580
537
  batch=batch,
581
538
  preprocess=preprocess,
582
539
  postprocess=postprocess,
583
540
  )
584
- return result.embeddings
585
541
 
586
542
  training_step = partialmethod(step, strata=Strata.train)
587
543
  validation_step = partialmethod(step, strata=Strata.validate)