bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/data/__init__.py ADDED
@@ -0,0 +1,65 @@
1
+ """Data infrastructure.
2
+
3
+ Provides core data models, identifiers, timestamps, serialization,
4
+ metadata tracking, repository pattern, and validation utilities.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from bead.data.base import BeadBaseModel
10
+ from bead.data.identifiers import extract_timestamp, generate_uuid, is_valid_uuid7
11
+ from bead.data.metadata import (
12
+ MetadataTracker,
13
+ ProcessingRecord,
14
+ ProvenanceRecord,
15
+ )
16
+ from bead.data.range import Range
17
+ from bead.data.repository import Repository
18
+ from bead.data.serialization import (
19
+ DeserializationError,
20
+ SerializationError,
21
+ append_jsonlines,
22
+ read_jsonlines,
23
+ stream_jsonlines,
24
+ write_jsonlines,
25
+ )
26
+ from bead.data.timestamps import format_iso8601, now_iso8601, parse_iso8601
27
+ from bead.data.validation import (
28
+ ValidationReport,
29
+ validate_jsonlines_file,
30
+ validate_provenance_chain,
31
+ validate_uuid_references,
32
+ )
33
+
34
+ __all__ = [
35
+ # Base model
36
+ "BeadBaseModel",
37
+ # Identifiers
38
+ "generate_uuid",
39
+ "extract_timestamp",
40
+ "is_valid_uuid7",
41
+ # Range
42
+ "Range",
43
+ # Timestamps
44
+ "now_iso8601",
45
+ "parse_iso8601",
46
+ "format_iso8601",
47
+ # Serialization
48
+ "write_jsonlines",
49
+ "read_jsonlines",
50
+ "stream_jsonlines",
51
+ "append_jsonlines",
52
+ "SerializationError",
53
+ "DeserializationError",
54
+ # Metadata
55
+ "MetadataTracker",
56
+ "ProvenanceRecord",
57
+ "ProcessingRecord",
58
+ # Repository
59
+ "Repository",
60
+ # Validation
61
+ "ValidationReport",
62
+ "validate_jsonlines_file",
63
+ "validate_uuid_references",
64
+ "validate_provenance_chain",
65
+ ]
bead/data/base.py ADDED
@@ -0,0 +1,87 @@
1
+ """Base Pydantic model for all bead objects.
2
+
3
+ This module provides BeadBaseModel, the foundational Pydantic v2 model that all
4
+ bead data models should inherit from. It provides automatic ID generation,
5
+ timestamp tracking, and versioning.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from datetime import datetime
11
+ from uuid import UUID
12
+
13
+ from pydantic import BaseModel, ConfigDict, Field
14
+
15
+ from bead.data.identifiers import generate_uuid
16
+ from bead.data.timestamps import now_iso8601
17
+
18
+ # Type alias for JSON-serializable values (recursive type)
19
+ type JsonValue = (
20
+ str | int | float | bool | None | list[JsonValue] | dict[str, JsonValue]
21
+ )
22
+
23
+
24
+ class BeadBaseModel(BaseModel):
25
+ """Base Pydantic model for all bead objects.
26
+
27
+ This model provides foundational fields and configuration that all bead
28
+ data models inherit. It includes automatic ID generation using UUIDv7,
29
+ timestamp tracking for creation and modification, versioning, and metadata.
30
+
31
+ Attributes
32
+ ----------
33
+ id : UUID
34
+ Unique identifier (UUIDv7) automatically generated on creation
35
+ created_at : datetime
36
+ UTC timestamp when object was created
37
+ modified_at : datetime
38
+ UTC timestamp when object was last modified
39
+ version : str
40
+ Version string for schema versioning (default: "1.0.0")
41
+ metadata : dict[str, JsonValue]
42
+ Optional metadata dictionary for arbitrary key-value pairs
43
+
44
+ Examples
45
+ --------
46
+ >>> class MyModel(BeadBaseModel):
47
+ ... name: str
48
+ ... value: int
49
+ >>> obj = MyModel(name="test", value=42)
50
+ >>> obj.id # doctest: +SKIP
51
+ UUID('...')
52
+ >>> obj.version
53
+ '1.0.0'
54
+ >>> obj.update_modified_time()
55
+ >>> obj.modified_at > obj.created_at
56
+ True
57
+ """
58
+
59
+ model_config = ConfigDict(
60
+ extra="forbid", # Disallow extra fields not defined in model
61
+ frozen=False, # Allow modification after creation
62
+ validate_assignment=True, # Validate when assigning to fields
63
+ )
64
+
65
+ id: UUID = Field(default_factory=generate_uuid)
66
+ created_at: datetime = Field(default_factory=now_iso8601)
67
+ modified_at: datetime = Field(default_factory=now_iso8601)
68
+ version: str = Field(default="1.0.0")
69
+ metadata: dict[str, JsonValue] = Field(default_factory=dict)
70
+
71
+ def update_modified_time(self) -> None:
72
+ """Update the modified_at timestamp to current UTC time.
73
+
74
+ This method should be called whenever the object is modified to
75
+ maintain accurate modification tracking.
76
+
77
+ Examples
78
+ --------
79
+ >>> obj = BeadBaseModel()
80
+ >>> original_time = obj.modified_at
81
+ >>> import time
82
+ >>> time.sleep(0.01) # Small delay to ensure different timestamp
83
+ >>> obj.update_modified_time()
84
+ >>> obj.modified_at > original_time
85
+ True
86
+ """
87
+ self.modified_at = now_iso8601()
@@ -0,0 +1,97 @@
1
+ """UUIDv7 generation and utilities for bead package.
2
+
3
+ This module provides functions for generating time-ordered UUIDv7 identifiers,
4
+ extracting timestamps from them, and validating UUID versions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from uuid import UUID
10
+
11
+ import uuid_utils
12
+
13
+
14
+ def generate_uuid() -> UUID:
15
+ """Generate a time-ordered UUIDv7.
16
+
17
+ UUIDv7 is a time-ordered UUID format that embeds a timestamp in the first
18
+ 48 bits, making UUIDs sortable by creation time. This is useful for
19
+ maintaining chronological ordering of database records.
20
+
21
+ Returns
22
+ -------
23
+ UUID
24
+ A newly generated UUIDv7 with embedded timestamp
25
+
26
+ Examples
27
+ --------
28
+ >>> uuid1 = generate_uuid()
29
+ >>> uuid2 = generate_uuid()
30
+ >>> uuid1 < uuid2 # uuids are time-ordered
31
+ True
32
+ """
33
+ # convert uuid_utils.UUID to standard Python UUID for Pydantic compatibility
34
+ uuid7 = uuid_utils.uuid7()
35
+ return UUID(str(uuid7))
36
+
37
+
38
+ def extract_timestamp(uuid: UUID) -> int:
39
+ """Extract timestamp in milliseconds from a UUIDv7.
40
+
41
+ The timestamp is stored in the first 48 bits of the UUID and represents
42
+ milliseconds since Unix epoch (January 1, 1970 00:00:00 UTC).
43
+
44
+ Parameters
45
+ ----------
46
+ uuid
47
+ The UUIDv7 to extract timestamp from.
48
+
49
+ Returns
50
+ -------
51
+ int
52
+ Timestamp in milliseconds since Unix epoch
53
+
54
+ Examples
55
+ --------
56
+ >>> import time
57
+ >>> uuid = generate_uuid()
58
+ >>> timestamp = extract_timestamp(uuid)
59
+ >>> current_time = int(time.time() * 1000)
60
+ >>> abs(timestamp - current_time) < 1000 # within 1 second
61
+ True
62
+ """
63
+ # UUIDv7 stores timestamp in first 48 bits (6 bytes);
64
+ # UUID.bytes gives us the UUID as 16 bytes;
65
+ # extract first 6 bytes and convert to milliseconds
66
+ timestamp_bytes = uuid.bytes[:6]
67
+ timestamp_ms = int.from_bytes(timestamp_bytes, byteorder="big")
68
+ return timestamp_ms
69
+
70
+
71
+ def is_valid_uuid7(uuid: UUID) -> bool:
72
+ """Check if a UUID is a valid UUIDv7.
73
+
74
+ Validates that the UUID has version 7 by checking the version bits
75
+ (bits 48-51) which should be 0111 (7).
76
+
77
+ Parameters
78
+ ----------
79
+ uuid
80
+ The UUID to validate.
81
+
82
+ Returns
83
+ -------
84
+ bool
85
+ True if the UUID is version 7, False otherwise
86
+
87
+ Examples
88
+ --------
89
+ >>> uuid7 = generate_uuid()
90
+ >>> is_valid_uuid7(uuid7)
91
+ True
92
+ >>> from uuid import uuid4
93
+ >>> uuid4_val = uuid4()
94
+ >>> is_valid_uuid7(uuid4_val)
95
+ False
96
+ """
97
+ return uuid.version == 7
@@ -0,0 +1,61 @@
1
+ """ISO 639 language code validation and utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Annotated
6
+
7
+ from langcodes import Language
8
+ from langcodes.tag_parser import LanguageTagError
9
+ from pydantic import AfterValidator, Field
10
+
11
+
12
+ def validate_iso639_code(code: str | None) -> str | None:
13
+ """Validate language code against ISO 639-1 or ISO 639-3.
14
+
15
+ Parameters
16
+ ----------
17
+ code
18
+ Language code to validate (e.g., "en", "eng", "ko", "kor").
19
+
20
+ Returns
21
+ -------
22
+ str | None
23
+ Normalized language code (converted to ISO 639-3 if valid).
24
+
25
+ Raises
26
+ ------
27
+ ValueError
28
+ If code is not a valid ISO 639 language code.
29
+
30
+ Examples
31
+ --------
32
+ >>> validate_iso639_code("en")
33
+ 'eng'
34
+ >>> validate_iso639_code("eng")
35
+ 'eng'
36
+ >>> validate_iso639_code("ko")
37
+ 'kor'
38
+ >>> validate_iso639_code(None)
39
+ None
40
+ >>> validate_iso639_code("invalid")
41
+ Traceback (most recent call last):
42
+ ...
43
+ ValueError: Invalid language code: 'invalid'
44
+ """
45
+ if code is None:
46
+ return None
47
+
48
+ try:
49
+ # parse and normalize to ISO 639-3
50
+ lang = Language.get(code)
51
+ return lang.to_alpha3()
52
+ except (LanguageTagError, LookupError) as e:
53
+ raise ValueError(f"Invalid language code: {code!r}") from e
54
+
55
+
56
+ # type alias for language codes
57
+ LanguageCode = Annotated[
58
+ str | None,
59
+ AfterValidator(validate_iso639_code),
60
+ Field(description="ISO 639-1 or ISO 639-3 language code"),
61
+ ]
bead/data/metadata.py ADDED
@@ -0,0 +1,270 @@
1
+ """Metadata tracking models for provenance and processing history.
2
+
3
+ This module provides models for tracking provenance chains and processing history
4
+ for all bead objects. This enables full traceability of data transformations.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from uuid import UUID
11
+
12
+ from pydantic import Field
13
+
14
+ from bead.data.base import BeadBaseModel, JsonValue
15
+ from bead.data.timestamps import now_iso8601
16
+
17
+
18
+ def _empty_provenance_list() -> list[ProvenanceRecord]:
19
+ """Create empty provenance list."""
20
+ return []
21
+
22
+
23
+ def _empty_processing_list() -> list[ProcessingRecord]:
24
+ """Create empty processing list."""
25
+ return []
26
+
27
+
28
+ class ProvenanceRecord(BeadBaseModel):
29
+ """Record of a provenance relationship between objects.
30
+
31
+ Tracks a single parent-child relationship in the provenance chain, including
32
+ what the parent was, its type, and the nature of the relationship.
33
+
34
+ Attributes
35
+ ----------
36
+ parent_id : UUID
37
+ UUID of the parent object in the provenance chain
38
+ parent_type : str
39
+ Type name of the parent object (e.g., "LexicalItem", "Template")
40
+ relationship : str
41
+ Type of relationship (e.g., "derived_from", "filled_from", "generated_from")
42
+ timestamp : datetime
43
+ When this relationship was established (UTC with timezone)
44
+
45
+ Examples
46
+ --------
47
+ >>> from uuid import uuid4
48
+ >>> parent_id = uuid4()
49
+ >>> record = ProvenanceRecord(
50
+ ... parent_id=parent_id,
51
+ ... parent_type="Template",
52
+ ... relationship="filled_from"
53
+ ... )
54
+ >>> record.parent_type
55
+ 'Template'
56
+ >>> record.timestamp is not None
57
+ True
58
+ """
59
+
60
+ parent_id: UUID
61
+ parent_type: str
62
+ relationship: str
63
+ timestamp: datetime = Field(default_factory=now_iso8601)
64
+
65
+
66
+ class ProcessingRecord(BeadBaseModel):
67
+ """Record of a processing operation applied to an object.
68
+
69
+ Tracks a single operation in the processing history, including the operation
70
+ name, parameters used, when it was performed, and who/what performed it.
71
+
72
+ Attributes
73
+ ----------
74
+ operation : str
75
+ Name of the operation (e.g., "fill_template", "apply_constraint", "filter")
76
+ parameters : dict[str, JsonValue]
77
+ Parameters passed to the operation (default: empty dict)
78
+ timestamp : datetime
79
+ When the operation was performed (UTC with timezone)
80
+ operator : str | None
81
+ Who/what performed the operation (e.g., "TemplateFiller-v1.0", user ID)
82
+ (default: None)
83
+
84
+ Examples
85
+ --------
86
+ >>> record = ProcessingRecord(
87
+ ... operation="fill_template",
88
+ ... parameters={"strategy": "exhaustive", "max_items": 100},
89
+ ... operator="TemplateFiller-v1.0"
90
+ ... )
91
+ >>> record.operation
92
+ 'fill_template'
93
+ >>> record.parameters["strategy"]
94
+ 'exhaustive'
95
+ >>> record.timestamp is not None
96
+ True
97
+ """
98
+
99
+ operation: str
100
+ parameters: dict[str, JsonValue] = Field(default_factory=dict)
101
+ timestamp: datetime = Field(default_factory=now_iso8601)
102
+ operator: str | None = None
103
+
104
+
105
+ class MetadataTracker(BeadBaseModel):
106
+ """Metadata tracking for provenance and processing history.
107
+
108
+ Tracks both provenance (where data came from) and processing history
109
+ (what operations were applied) for complete data lineage.
110
+
111
+ Attributes
112
+ ----------
113
+ provenance : list[ProvenanceRecord]
114
+ Chain of provenance relationships (default: empty list)
115
+ processing_history : list[ProcessingRecord]
116
+ History of processing operations (default: empty list)
117
+ custom_metadata : dict[str, JsonValue]
118
+ Custom metadata fields (default: empty dict)
119
+
120
+ Examples
121
+ --------
122
+ >>> from uuid import uuid4
123
+ >>> tracker = MetadataTracker()
124
+ >>> parent_id = uuid4()
125
+ >>> tracker.add_provenance(parent_id, "Template", "filled_from")
126
+ >>> tracker.add_processing("fill_template", {"strategy": "exhaustive"})
127
+ >>> len(tracker.provenance)
128
+ 1
129
+ >>> len(tracker.processing_history)
130
+ 1
131
+ >>> chain = tracker.get_provenance_chain()
132
+ >>> len(chain)
133
+ 1
134
+ """
135
+
136
+ provenance: list[ProvenanceRecord] = Field(default_factory=_empty_provenance_list)
137
+ processing_history: list[ProcessingRecord] = Field(
138
+ default_factory=_empty_processing_list
139
+ )
140
+ custom_metadata: dict[str, JsonValue] = Field(default_factory=dict)
141
+
142
+ def add_provenance(
143
+ self, parent_id: UUID, parent_type: str, relationship: str
144
+ ) -> None:
145
+ """Add a provenance record to the chain.
146
+
147
+ Creates a new provenance record and adds it to the provenance list.
148
+ The timestamp is automatically set to the current time.
149
+
150
+ Parameters
151
+ ----------
152
+ parent_id : UUID
153
+ UUID of the parent object
154
+ parent_type : str
155
+ Type name of the parent object (e.g., "Template", "LexicalItem")
156
+ relationship : str
157
+ Type of relationship (e.g., "derived_from", "filled_from")
158
+
159
+ Examples
160
+ --------
161
+ >>> from uuid import uuid4
162
+ >>> tracker = MetadataTracker()
163
+ >>> parent_id = uuid4()
164
+ >>> tracker.add_provenance(parent_id, "Template", "filled_from")
165
+ >>> len(tracker.provenance)
166
+ 1
167
+ >>> tracker.provenance[0].parent_type
168
+ 'Template'
169
+ """
170
+ record = ProvenanceRecord(
171
+ parent_id=parent_id, parent_type=parent_type, relationship=relationship
172
+ )
173
+ self.provenance.append(record)
174
+
175
+ def add_processing(
176
+ self,
177
+ operation: str,
178
+ parameters: dict[str, JsonValue] | None = None,
179
+ operator: str | None = None,
180
+ ) -> None:
181
+ """Add a processing record to the history.
182
+
183
+ Creates a new processing record and adds it to the processing history.
184
+ The timestamp is automatically set to the current time.
185
+
186
+ Parameters
187
+ ----------
188
+ operation : str
189
+ Name of the operation performed
190
+ parameters : dict[str, JsonValue] | None, optional
191
+ Parameters passed to the operation (default: None, which creates empty dict)
192
+ operator : str | None, optional
193
+ Who/what performed the operation (default: None)
194
+
195
+ Examples
196
+ --------
197
+ >>> tracker = MetadataTracker()
198
+ >>> tracker.add_processing("fill_template", {"strategy": "exhaustive"})
199
+ >>> len(tracker.processing_history)
200
+ 1
201
+ >>> tracker.processing_history[0].operation
202
+ 'fill_template'
203
+ >>> tracker.add_processing("filter", operator="FilterSystem-v2.0")
204
+ >>> tracker.processing_history[1].operator
205
+ 'FilterSystem-v2.0'
206
+ """
207
+ if parameters is None:
208
+ parameters = {}
209
+ record = ProcessingRecord(
210
+ operation=operation, parameters=parameters, operator=operator
211
+ )
212
+ self.processing_history.append(record)
213
+
214
+ def get_provenance_chain(self) -> list[UUID]:
215
+ """Get the full provenance chain as a list of parent UUIDs.
216
+
217
+ Returns the parent UUIDs in the order they were added to the provenance list.
218
+
219
+ Returns
220
+ -------
221
+ list[UUID]
222
+ List of parent UUIDs in chronological order
223
+
224
+ Examples
225
+ --------
226
+ >>> from uuid import uuid4
227
+ >>> tracker = MetadataTracker()
228
+ >>> parent1 = uuid4()
229
+ >>> parent2 = uuid4()
230
+ >>> tracker.add_provenance(parent1, "Template", "filled_from")
231
+ >>> tracker.add_provenance(parent2, "LexicalItem", "derived_from")
232
+ >>> chain = tracker.get_provenance_chain()
233
+ >>> len(chain)
234
+ 2
235
+ >>> chain[0] == parent1
236
+ True
237
+ """
238
+ return [record.parent_id for record in self.provenance]
239
+
240
+ def get_recent_processing(self, n: int = 5) -> list[ProcessingRecord]:
241
+ """Get the N most recent processing records.
242
+
243
+ Returns the most recent processing records, up to N records. If there
244
+ are fewer than N records, returns all available records.
245
+
246
+ Parameters
247
+ ----------
248
+ n : int, optional
249
+ Number of recent records to return (default: 5)
250
+
251
+ Returns
252
+ -------
253
+ list[ProcessingRecord]
254
+ List of up to N most recent processing records, newest first
255
+
256
+ Examples
257
+ --------
258
+ >>> tracker = MetadataTracker()
259
+ >>> tracker.add_processing("operation1")
260
+ >>> tracker.add_processing("operation2")
261
+ >>> tracker.add_processing("operation3")
262
+ >>> recent = tracker.get_recent_processing(n=2)
263
+ >>> len(recent)
264
+ 2
265
+ >>> recent[0].operation
266
+ 'operation3'
267
+ >>> recent[1].operation
268
+ 'operation2'
269
+ """
270
+ return list(reversed(self.processing_history[-n:]))
bead/data/range.py ADDED
@@ -0,0 +1,123 @@
1
+ """Generic numeric range model with validation.
2
+
3
+ Provides a reusable Range[T] model for representing validated numeric ranges
4
+ with bounds checking, containment testing, and value clamping.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Generic, TypeVar
10
+
11
+ from pydantic import BaseModel, ConfigDict, model_validator
12
+
13
+ T = TypeVar("T", int, float)
14
+
15
+
16
+ class Range(BaseModel, Generic[T]): # noqa: UP046 - Pydantic requires Generic[T]
17
+ """A validated numeric range with inclusive bounds.
18
+
19
+ Provides a generic container for numeric ranges with automatic validation
20
+ that min < max. Supports containment testing and value clamping.
21
+
22
+ Attributes
23
+ ----------
24
+ min
25
+ Minimum value (inclusive).
26
+ max
27
+ Maximum value (inclusive).
28
+
29
+ Examples
30
+ --------
31
+ >>> scale = Range[int](min=1, max=7)
32
+ >>> scale.contains(4)
33
+ True
34
+ >>> scale.contains(0)
35
+ False
36
+ >>> scale.clamp(10)
37
+ 7
38
+
39
+ >>> probability = Range[float](min=0.0, max=1.0)
40
+ >>> probability.contains(0.5)
41
+ True
42
+ >>> probability.clamp(-0.1)
43
+ 0.0
44
+ """
45
+
46
+ model_config = ConfigDict(
47
+ extra="forbid",
48
+ frozen=True,
49
+ )
50
+
51
+ min: T
52
+ max: T
53
+
54
+ @model_validator(mode="after")
55
+ def validate_order(self) -> Range[T]:
56
+ """Validate that min is strictly less than max.
57
+
58
+ Returns
59
+ -------
60
+ Range[T]
61
+ The validated range instance.
62
+
63
+ Raises
64
+ ------
65
+ ValueError
66
+ If min is greater than or equal to max.
67
+ """
68
+ if self.min >= self.max:
69
+ raise ValueError(f"min ({self.min}) must be less than max ({self.max})")
70
+ return self
71
+
72
+ def contains(self, value: T) -> bool:
73
+ """Check if a value is within the range (inclusive).
74
+
75
+ Parameters
76
+ ----------
77
+ value
78
+ The value to check.
79
+
80
+ Returns
81
+ -------
82
+ bool
83
+ True if min <= value <= max, False otherwise.
84
+
85
+ Examples
86
+ --------
87
+ >>> r = Range[int](min=1, max=5)
88
+ >>> r.contains(3)
89
+ True
90
+ >>> r.contains(1)
91
+ True
92
+ >>> r.contains(5)
93
+ True
94
+ >>> r.contains(6)
95
+ False
96
+ """
97
+ return self.min <= value <= self.max
98
+
99
+ def clamp(self, value: T) -> T:
100
+ """Clamp a value to the range bounds.
101
+
102
+ Parameters
103
+ ----------
104
+ value
105
+ The value to clamp.
106
+
107
+ Returns
108
+ -------
109
+ T
110
+ The clamped value (min if value < min, max if value > max,
111
+ otherwise the original value).
112
+
113
+ Examples
114
+ --------
115
+ >>> r = Range[int](min=1, max=5)
116
+ >>> r.clamp(3)
117
+ 3
118
+ >>> r.clamp(0)
119
+ 1
120
+ >>> r.clamp(10)
121
+ 5
122
+ """
123
+ return max(self.min, min(self.max, value))