@fulmenhq/tsfulmen 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/README.md +60 -7
  3. package/config/crucible-ts/agentic/roles/README.md +3 -3
  4. package/config/crucible-ts/library/fulencode/fixtures/README.md +18 -0
  5. package/config/crucible-ts/library/fulencode/fixtures/bom/bom.yaml +14 -0
  6. package/config/crucible-ts/library/fulencode/fixtures/detection/detection.yaml +12 -0
  7. package/config/crucible-ts/library/fulencode/fixtures/invalid-encodings/base64.yaml +10 -0
  8. package/config/crucible-ts/library/fulencode/fixtures/normalization/text-safe.yaml +10 -0
  9. package/config/crucible-ts/library/fulencode/fixtures/telemetry/telemetry-test-cases.yaml +24 -0
  10. package/config/crucible-ts/library/fulencode/fixtures/valid-encodings/base64.yaml +11 -0
  11. package/config/crucible-ts/taxonomy/library/platform-modules/v1.0.0/modules.yaml +2 -2
  12. package/config/crucible-ts/taxonomy/metrics.yaml +79 -1
  13. package/dist/appidentity/index.d.ts +1 -1
  14. package/dist/appidentity/index.js +122 -56
  15. package/dist/appidentity/index.js.map +1 -1
  16. package/dist/config/index.js +122 -56
  17. package/dist/config/index.js.map +1 -1
  18. package/dist/crucible/index.js +122 -56
  19. package/dist/crucible/index.js.map +1 -1
  20. package/dist/errors/index.d.ts +1 -1
  21. package/dist/errors/index.js +122 -56
  22. package/dist/errors/index.js.map +1 -1
  23. package/dist/foundry/index.js +122 -56
  24. package/dist/foundry/index.js.map +1 -1
  25. package/dist/fulencode/index.d.ts +102 -0
  26. package/dist/fulencode/index.js +806 -0
  27. package/dist/fulencode/index.js.map +1 -0
  28. package/dist/index.d.ts +2 -2
  29. package/dist/index.js +123 -57
  30. package/dist/index.js.map +1 -1
  31. package/dist/pathfinder/index.d.ts +1 -1
  32. package/dist/pathfinder/index.js +122 -56
  33. package/dist/pathfinder/index.js.map +1 -1
  34. package/dist/reports/license-inventory.csv +7 -6
  35. package/dist/schema/index.d.ts +2 -2
  36. package/dist/schema/index.js +122 -56
  37. package/dist/schema/index.js.map +1 -1
  38. package/dist/signals/index.js +122 -56
  39. package/dist/signals/index.js.map +1 -1
  40. package/dist/telemetry/http/index.js +122 -56
  41. package/dist/telemetry/http/index.js.map +1 -1
  42. package/dist/telemetry/index.d.ts +1 -1
  43. package/dist/telemetry/index.js +122 -56
  44. package/dist/telemetry/index.js.map +1 -1
  45. package/dist/telemetry/prometheus/index.d.ts +1 -1
  46. package/dist/telemetry/prometheus/index.js +122 -56
  47. package/dist/telemetry/prometheus/index.js.map +1 -1
  48. package/dist/{types-BJswWpQC.d.ts → types-DdoeE7F5.d.ts} +1 -1
  49. package/package.json +7 -2
  50. package/schemas/crucible-ts/library/fulencode/v1.0.0/README.md +37 -0
  51. package/schemas/crucible-ts/library/fulencode/v1.0.0/bom-result.schema.json +48 -0
  52. package/schemas/crucible-ts/library/fulencode/v1.0.0/decode-options.schema.json +60 -0
  53. package/schemas/crucible-ts/library/fulencode/v1.0.0/decoding-result.schema.json +70 -0
  54. package/schemas/crucible-ts/library/fulencode/v1.0.0/detect-options.schema.json +25 -0
  55. package/schemas/crucible-ts/library/fulencode/v1.0.0/detection-result.schema.json +57 -0
  56. package/schemas/crucible-ts/library/fulencode/v1.0.0/encode-options.schema.json +71 -0
  57. package/schemas/crucible-ts/library/fulencode/v1.0.0/encoding-result.schema.json +57 -0
  58. package/schemas/crucible-ts/library/fulencode/v1.0.0/fulencode-config.schema.json +8 -4
  59. package/schemas/crucible-ts/library/fulencode/v1.0.0/fulencode-error.schema.json +66 -0
  60. package/schemas/crucible-ts/library/fulencode/v1.0.0/normalization-result.schema.json +73 -0
  61. package/schemas/crucible-ts/library/fulencode/v1.0.0/normalize-options.schema.json +44 -0
  62. package/schemas/crucible-ts/meta/README.md +38 -2
  63. package/schemas/crucible-ts/meta/draft-04/schema.json +222 -0
  64. package/schemas/crucible-ts/meta/draft-06/schema.json +218 -0
  65. package/schemas/crucible-ts/meta/draft-2019-09/meta/applicator.json +93 -0
  66. package/schemas/crucible-ts/meta/draft-2019-09/meta/content.json +21 -0
  67. package/schemas/crucible-ts/meta/draft-2019-09/meta/core.json +58 -0
  68. package/schemas/crucible-ts/meta/draft-2019-09/meta/format.json +15 -0
  69. package/schemas/crucible-ts/meta/draft-2019-09/meta/meta-data.json +35 -0
  70. package/schemas/crucible-ts/meta/draft-2019-09/meta/validation.json +119 -0
  71. package/schemas/crucible-ts/meta/draft-2019-09/offline.schema.json +148 -0
  72. package/schemas/crucible-ts/meta/draft-2019-09/schema.json +62 -0
  73. package/schemas/crucible-ts/meta/fixtures/draft-04-sample.json +16 -0
  74. package/schemas/crucible-ts/meta/fixtures/draft-06-sample.json +16 -0
  75. package/schemas/crucible-ts/meta/fixtures/draft-07-sample.json +34 -0
  76. package/schemas/crucible-ts/meta/fixtures/draft-2019-09-sample.json +21 -0
  77. package/schemas/crucible-ts/meta/fixtures/draft-2020-12-sample.json +21 -0
  78. package/schemas/crucible-ts/taxonomy/library/fulencode/normalization-profiles/v1.0.0/profiles.yaml +16 -0
  79. package/schemas/crucible-ts/upstream/3leaps/crucible/PROVENANCE.md +64 -0
  80. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/access-tier.dimension.json +103 -0
  81. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/retention-lifecycle.dimension.json +103 -0
  82. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/schema-stability.dimension.json +100 -0
  83. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/sensitivity.dimension.json +130 -0
  84. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/velocity-mode.dimension.json +79 -0
  85. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/volatility.dimension.json +72 -0
  86. package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/volume-tier.dimension.json +66 -0
  87. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/catalog/classifiers/README.md +29 -0
  88. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/access-tier-classification.md +163 -0
  89. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/classifiers-framework.md +157 -0
  90. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/data-sensitivity-classification.md +259 -0
  91. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/retention-lifecycle-classification.md +200 -0
  92. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/schema-stability-classification.md +205 -0
  93. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/velocity-mode-classification.md +222 -0
  94. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volatility-classification.md +209 -0
  95. package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volume-tier-classification.md +200 -0
  96. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/ailink/v0/README.md +48 -0
  97. package/schemas/crucible-ts/upstream/3leaps/{ailink → crucible/schemas/ailink}/v0/prompt.schema.json +4 -18
  98. package/schemas/crucible-ts/upstream/3leaps/{ailink → crucible/schemas/ailink}/v0/search-response.schema.json +7 -37
  99. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/classifiers/v0/dimension-definition.schema.json +247 -0
  100. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/classifiers/v0/sensitivity-level.schema.json +67 -0
  101. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/error-response.schema.json +59 -0
  102. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/lifecycle-phases.data.json +102 -0
  103. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/lifecycle-phases.schema.json +101 -0
  104. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/release-phase.schema.json +18 -0
  105. package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/types.schema.json +177 -0
  106. package/schemas/crucible-ts/upstream/3leaps/PROVENANCE.md +0 -43
  107. /package/schemas/crucible-ts/upstream/3leaps/{agentic → crucible/schemas/agentic}/v0/role-prompt.schema.json +0 -0
@@ -0,0 +1,209 @@
1
+ ---
2
+ title: "Volatility & Update Cadence Classification"
3
+ description: "Standard for classifying data and configuration update frequency"
4
+ category: "standards"
5
+ status: "stable"
6
+ version: "1.0.0"
7
+ lastUpdated: "2026-01-22"
8
+ maintainer: "3leaps-core"
9
+ reviewers: ["platform", "data-engineering"]
10
+ approvers: ["3leapsdave"]
11
+ tags: ["classification", "volatility", "cadence", "scheduling", "data-lifecycle"]
12
+ content_license: "CC0"
13
+ relatedDocs:
14
+ - "schemas/classifiers/v0/dimension-definition.schema.json"
15
+ - "config/classifiers/dimensions/volatility.dimension.json"
16
+ - "docs/standards/data-sensitivity-classification.md"
17
+ audience: "all"
18
+ ---
19
+
20
+ # Volatility & Update Cadence Classification
21
+
22
+ This standard defines update cadence levels for data and configuration across all 3leaps ecosystems. It provides a consistent framework for:
23
+
24
+ - **Freshness SLAs** - Setting expectations for data currency
25
+ - **Scheduling** - Determining batch job and pipeline frequencies
26
+ - **Partitioning** - Informing time-based partitioning strategies
27
+ - **Caching** - Setting appropriate TTLs and invalidation policies
28
+ - **Resource Planning** - Estimating compute and storage requirements
29
+
30
+ ## Volatility Levels
31
+
32
+ Volatility is an **ordinal** dimension—higher values indicate more frequent updates.
33
+
34
+ | Level | Key | Description | Typical Use Cases |
35
+ | ----- | ----------- | ----------------------- | ------------------------------------------------------- |
36
+ | 0 | `unknown` | Not yet classified | New data sources pending classification |
37
+ | 1 | `static` | No scheduled updates | Reference data, schemas, standards, one-time snapshots |
38
+ | 2 | `monthly` | Roughly monthly batches | Financial reports, compliance audits, capacity planning |
39
+ | 3 | `weekly` | Roughly weekly batches | Product catalogs, pricing updates, aggregated metrics |
40
+ | 4 | `daily` | Daily batches | Transaction summaries, daily snapshots, ETL pipelines |
41
+ | 5 | `hourly` | Sub-daily batches | Operational metrics, near-real-time dashboards, alerts |
42
+ | 6 | `streaming` | Event-driven continuous | Real-time telemetry, live feeds, event sourcing |
43
+
44
+ ---
45
+
46
+ ## Level Details
47
+
48
+ ### Unknown (0)
49
+
50
+ **Volatility not yet classified; must be classified before operational use.**
51
+
52
+ | Aspect | Requirement |
53
+ | ---------------- | ------------------------------------------------------------ |
54
+ | **Scheduling** | Prohibited until classified |
55
+ | **Caching** | Conservative defaults only (no long TTL assumptions) |
56
+ | **Partitioning** | Avoid production partition strategy decisions |
57
+ | **Use Cases** | New data feeds, imported datasets, unprofiled or new sources |
58
+
59
+ **Operational Notes**: Gate operational use on explicit classification. Do not treat missing or unknown volatility as “static” or “daily” by default.
60
+
61
+ ---
62
+
63
+ ### Static (1)
64
+
65
+ **No scheduled updates—one-time or ad-hoc changes only.**
66
+
67
+ - Changes require explicit versioning and release process
68
+ - Safe to cache indefinitely (until version changes)
69
+ - Examples: JSON schemas, role definitions, reference taxonomies
70
+
71
+ **Operational Implications**:
72
+
73
+ - No scheduled refresh jobs needed
74
+ - Version-based cache invalidation
75
+ - Changes go through PR/review process
76
+
77
+ ---
78
+
79
+ ### Monthly (2)
80
+
81
+ **Batch updates roughly monthly.**
82
+
83
+ - Typically aligned with business cycles (month-end close, reporting periods)
84
+ - Allow 24-48 hour processing windows
85
+ - Examples: Financial statements, compliance reports, capacity forecasts
86
+
87
+ **Operational Implications**:
88
+
89
+ - Schedule during low-traffic windows
90
+ - Plan for larger batch sizes
91
+ - Coordinate with downstream consumers on refresh dates
92
+
93
+ ---
94
+
95
+ ### Weekly (3)
96
+
97
+ **Batch updates roughly weekly.**
98
+
99
+ - Common for curated datasets that balance freshness and processing cost
100
+ - Examples: Product catalogs, aggregated analytics, weekly digests
101
+
102
+ **Operational Implications**:
103
+
104
+ - Typical refresh: weekends or early morning
105
+ - Moderate batch sizes
106
+ - Weekly SLA monitoring
107
+
108
+ ---
109
+
110
+ ### Daily (4)
111
+
112
+ **Daily batches—the most common cadence for operational data.**
113
+
114
+ - Standard for transactional summaries and operational reporting
115
+ - Examples: Daily sales, order summaries, log aggregations
116
+
117
+ **Operational Implications**:
118
+
119
+ - Nightly batch windows (typically 00:00-06:00)
120
+ - Date-partitioned storage recommended
121
+ - T+1 data availability expectations
122
+
123
+ ---
124
+
125
+ ### Hourly (5)
126
+
127
+ **Sub-daily batches—hourly or more frequent.**
128
+
129
+ - Bridges gap between batch and streaming
130
+ - Examples: Operational dashboards, alerting thresholds, rate limit counters
131
+
132
+ **Operational Implications**:
133
+
134
+ - Micro-batch processing
135
+ - Hour-partitioned or rolling windows
136
+ - Higher compute costs than daily
137
+ - Consider streaming if approaching minute-level freshness needs
138
+
139
+ ---
140
+
141
+ ### Streaming (6)
142
+
143
+ **Event-driven continuous updates—sub-minute latency.**
144
+
145
+ - True real-time processing
146
+ - Examples: Live telemetry, event sourcing, real-time fraud detection
147
+
148
+ **Operational Implications**:
149
+
150
+ - Requires streaming infrastructure (Kafka, Kinesis, Pulsar)
151
+ - Continuous compute costs
152
+ - Complex exactly-once semantics
153
+ - Backpressure and scaling considerations
154
+
155
+ ---
156
+
157
+ ## Decision Guide
158
+
159
+ ```
160
+ How quickly must consumers see new data?
161
+
162
+ ├── "Whenever we release a new version" → static
163
+ ├── "By the end of the month" → monthly
164
+ ├── "Within a week" → weekly
165
+ ├── "Next business day" → daily
166
+ ├── "Within hours" → hourly
167
+ └── "Immediately / real-time" → streaming
168
+ ```
169
+
170
+ ### Cost-Freshness Tradeoff
171
+
172
+ | Volatility | Relative Cost | Freshness | Complexity |
173
+ | ---------- | ------------- | ------------------- | ---------- |
174
+ | static | Lowest | Stale until release | Simplest |
175
+ | monthly | Low | Up to 30 days | Simple |
176
+ | weekly | Low-Medium | Up to 7 days | Simple |
177
+ | daily | Medium | Up to 24 hours | Moderate |
178
+ | hourly | Medium-High | Up to 1 hour | Moderate |
179
+ | streaming | Highest | Sub-minute | Complex |
180
+
181
+ **Guidance**: Start with the lowest volatility that meets business requirements. Upgrading to higher frequency is easier than optimizing an over-engineered streaming system.
182
+
183
+ ---
184
+
185
+ ## Combining with Other Dimensions
186
+
187
+ Volatility works alongside other classifiers:
188
+
189
+ | Combination | Implication |
190
+ | --------------------------------------------------- | ----------------------------------------------------- |
191
+ | `sensitivity: 4-personal` + `volatility: streaming` | Real-time PII requires streaming encryption and audit |
192
+ | `sensitivity: 0-public` + `volatility: static` | Cacheable forever, CDN-friendly |
193
+ | `volatility: daily` + partitioning | Use date-based partitions |
194
+ | `volatility: streaming` + storage | Consider append-only / event log storage |
195
+
196
+ ---
197
+
198
+ ## Machine-Readable Definition
199
+
200
+ - **Dimension Config**: `config/classifiers/dimensions/volatility.dimension.json`
201
+ - **Schema**: `schemas/classifiers/v0/dimension-definition.schema.json`
202
+
203
+ ---
204
+
205
+ ## Attribution
206
+
207
+ This standard is the canonical reference for volatility classification across 3leaps ecosystems. Downstream consumers should reference or vendor this standard rather than maintaining independent copies.
208
+
209
+ **Review Cycle**: Semiannual with platform and data engineering teams.
@@ -0,0 +1,200 @@
1
+ ---
2
+ title: "Volume Tier Classification"
3
+ description: "Data volume classification standard for scale planning"
4
+ category: "standards"
5
+ status: "stable"
6
+ version: "1.0.0"
7
+ lastUpdated: "2026-01-22"
8
+ maintainer: "3leaps-core"
9
+ reviewers: ["platform", "data-engineering"]
10
+ approvers: ["3leapsdave"]
11
+ tags: ["classification", "volume", "scale", "data-engineering", "partitioning"]
12
+ content_license: "CC0"
13
+ relatedDocs:
14
+ - "docs/standards/velocity-mode-classification.md"
15
+ - "config/classifiers/dimensions/volume-tier.dimension.json"
16
+ audience: "all"
17
+ ---
18
+
19
+ # Volume Tier Classification
20
+
21
+ This standard defines volume tier levels for data across all 3leaps ecosystems. It provides a consistent framework for:
22
+
23
+ - **Scale Planning** - Estimating infrastructure requirements
24
+ - **Partitioning Strategies** - Optimal data organization
25
+ - **File Sizing** - Appropriate chunk sizes for processing
26
+ - **Storage Selection** - Choosing storage systems and formats
27
+ - **Benchmark Profiles** - Performance testing at appropriate scale
28
+
29
+ Volume tier is an **ordinal** dimension—higher values indicate larger scale.
30
+
31
+ ---
32
+
33
+ ## Volume Tiers
34
+
35
+ | Tier | Row Count | Typical Size | Processing Model |
36
+ | ----------- | --------- | ------------ | -------------------------- |
37
+ | **unknown** | Unknown | Unknown | Cannot provision |
38
+ | **tiny** | ≤100K | <100 MB | In-memory, single file |
39
+ | **small** | ≤10M | <10 GB | Single-node |
40
+ | **medium** | ≤1B | <1 TB | Distributed beneficial |
41
+ | **large** | ≤100B | <100 TB | Distributed required |
42
+ | **massive** | >100B | >100 TB | Specialized infrastructure |
43
+
44
+ ---
45
+
46
+ ## Tier Details
47
+
48
+ ### Unknown
49
+
50
+ **Volume not yet classified; must be classified before infrastructure provisioning.**
51
+
52
+ | Aspect | Guidance |
53
+ | ------------------ | ----------------------------------------- |
54
+ | **Processing** | Unknown; cannot provision infrastructure |
55
+ | **Storage Format** | Staging only |
56
+ | **Partitioning** | Cannot determine |
57
+ | **Infrastructure** | Quarantine/staging environment |
58
+ | **Use Cases** | New data feeds, imports pending profiling |
59
+
60
+ **Operational Notes**: Gate infrastructure provisioning decisions on explicit classification. Profile data to determine appropriate tier before production deployment.
61
+
62
+ ---
63
+
64
+ ### Tiny (≤100K rows)
65
+
66
+ **Very small datasets; single-file, in-memory processing.**
67
+
68
+ | Aspect | Guidance |
69
+ | ------------------ | ----------------------------------------- |
70
+ | **Processing** | In-memory (pandas, DuckDB, etc.) |
71
+ | **Storage Format** | CSV, JSON, single Parquet file |
72
+ | **Partitioning** | None needed |
73
+ | **Infrastructure** | Local machine, small container |
74
+ | **Use Cases** | Test fixtures, config data, lookup tables |
75
+
76
+ ---
77
+
78
+ ### Small (≤10M rows)
79
+
80
+ **Small datasets; single-node processing, moderate file sizes.**
81
+
82
+ | Aspect | Guidance |
83
+ | ------------------ | --------------------------------------------- |
84
+ | **Processing** | Single-node (laptop, small VM) |
85
+ | **Storage Format** | Parquet, CSV with compression |
86
+ | **Partitioning** | Optional (by date if time-series) |
87
+ | **Infrastructure** | Standard compute, local SSD |
88
+ | **Use Cases** | Product catalogs, user tables, reference data |
89
+
90
+ ---
91
+
92
+ ### Medium (≤1B rows)
93
+
94
+ **Medium datasets; partitioned storage, distributed processing beneficial.**
95
+
96
+ | Aspect | Guidance |
97
+ | ------------------ | ------------------------------------------ |
98
+ | **Processing** | Distributed beneficial (Spark, Dask) |
99
+ | **Storage Format** | Columnar (Parquet, ORC) required |
100
+ | **Partitioning** | Required (date, key columns) |
101
+ | **Infrastructure** | Cloud data warehouse, distributed compute |
102
+ | **Use Cases** | Transaction history, event logs, analytics |
103
+
104
+ **Optimization Tips**:
105
+
106
+ - Partition by date for time-series data
107
+ - Use predicate pushdown for queries
108
+ - Consider data lake with metadata layer
109
+
110
+ ---
111
+
112
+ ### Large (≤100B rows)
113
+
114
+ **Large datasets; distributed processing required, columnar formats.**
115
+
116
+ | Aspect | Guidance |
117
+ | ------------------ | ---------------------------------------- |
118
+ | **Processing** | Distributed required (Spark, Presto) |
119
+ | **Storage Format** | Columnar with compression (Parquet+Zstd) |
120
+ | **Partitioning** | Multi-level (date + key) |
121
+ | **Infrastructure** | Data lake, distributed compute clusters |
122
+ | **Use Cases** | Telemetry, clickstream, IoT sensors |
123
+
124
+ **Optimization Tips**:
125
+
126
+ - Aggressive partitioning and clustering
127
+ - Z-ordering or data skipping indexes
128
+ - Consider separate hot/warm/cold storage
129
+
130
+ ---
131
+
132
+ ### Massive (>100B rows)
133
+
134
+ **Massive datasets; specialized infrastructure, aggressive partitioning.**
135
+
136
+ | Aspect | Guidance |
137
+ | ------------------ | ------------------------------------------------ |
138
+ | **Processing** | Specialized systems (BigQuery, Redshift, custom) |
139
+ | **Storage Format** | Native formats, custom codecs |
140
+ | **Partitioning** | Heavy (multi-dimension, sharding) |
141
+ | **Infrastructure** | Enterprise data platforms, dedicated clusters |
142
+ | **Use Cases** | Global clickstream, genomics, simulation |
143
+
144
+ **Optimization Tips**:
145
+
146
+ - Work with platform specialists
147
+ - Consider materialized views/aggregates
148
+ - Pre-compute common queries
149
+ - Evaluate specialized databases
150
+
151
+ ---
152
+
153
+ ## Decision Guide
154
+
155
+ ```
156
+ How many rows in your dataset?
157
+
158
+ ├── Thousands (≤100K) → tiny
159
+ ├── Millions (≤10M) → small
160
+ ├── Hundreds of millions (≤1B) → medium
161
+ ├── Tens of billions (≤100B) → large
162
+ └── Hundreds of billions+ → massive
163
+ ```
164
+
165
+ ---
166
+
167
+ ## Infrastructure Recommendations
168
+
169
+ | Volume Tier | Storage System | Compute | Format |
170
+ | ----------- | -------------------------- | ---------------------- | ------------------ |
171
+ | **tiny** | Local FS, S3 single file | Local, small container | CSV, JSON |
172
+ | **small** | S3/GCS, local SSD | Single VM, serverless | Parquet |
173
+ | **medium** | Data lake (Delta, Iceberg) | Spark, serverless SQL | Parquet + metadata |
174
+ | **large** | Data lake, warehouse | Spark cluster, Presto | Parquet + Zstd |
175
+ | **massive** | Enterprise DW, BigQuery | Dedicated clusters | Native formats |
176
+
177
+ ---
178
+
179
+ ## Combining with Other Dimensions
180
+
181
+ | Combination | Implication |
182
+ | --------------------------------------- | ------------------------------------------ |
183
+ | `volume: large` + `velocity: streaming` | Requires streaming infrastructure at scale |
184
+ | `volume: tiny` + `sensitivity: 4` | Small but needs secure handling |
185
+ | `volume: massive` + `retention: long` | Archive storage strategy critical |
186
+
187
+ ---
188
+
189
+ ## Machine-Readable Definition
190
+
191
+ - **Dimension Config**: `config/classifiers/dimensions/volume-tier.dimension.json`
192
+ - **Schema**: `schemas/classifiers/v0/dimension-definition.schema.json`
193
+
194
+ ---
195
+
196
+ ## Attribution
197
+
198
+ This standard is the canonical reference for volume tier classification across 3leaps ecosystems. Downstream consumers should reference or vendor this standard rather than maintaining independent copies.
199
+
200
+ **Review Cycle**: Semiannual with platform and data engineering teams.
@@ -0,0 +1,48 @@
1
+ # AILink Schemas v0
2
+
3
+ Schemas for AI-powered backend integration.
4
+
5
+ **Status**: Unstable (v0) - breaking changes may occur without notice.
6
+
7
+ ## Schemas
8
+
9
+ | Schema | Purpose |
10
+ | ----------------------------- | ------------------------------------------- |
11
+ | `prompt.schema.json` | AILink prompt configuration |
12
+ | `search-response.schema.json` | Base response structure for search/analysis |
13
+
14
+ ## Usage
15
+
16
+ ### Prompt Configuration
17
+
18
+ Prompts use YAML frontmatter validated against `prompt.schema.json`:
19
+
20
+ ```yaml
21
+ slug: my-prompt
22
+ name: My Prompt
23
+ description: What this prompt does
24
+ version: 1.0.0
25
+ input:
26
+ required_variables:
27
+ - query
28
+ tools:
29
+ - type: web_search
30
+ ```
31
+
32
+ The prompt body (after `---`) contains the system template in markdown.
33
+
34
+ ### Response Validation
35
+
36
+ Responses are validated against `search-response.schema.json` or a domain-specific schema that extends it.
37
+
38
+ ## Schema URLs
39
+
40
+ ```
41
+ https://schemas.3leaps.dev/ailink/v0/prompt.schema.json
42
+ https://schemas.3leaps.dev/ailink/v0/search-response.schema.json
43
+ ```
44
+
45
+ ## Related
46
+
47
+ - [namelens/namelens](https://github.com/namelens/namelens) - Reference implementation
48
+ - [FulmenHQ Crucible](https://github.com/fulmenhq/crucible) - Enterprise extensions
@@ -4,12 +4,7 @@
4
4
  "title": "AILink Prompt Configuration",
5
5
  "description": "Schema for AILink prompt configuration files. Prompts define how AI backends process requests, including input variables, tools, and response validation.",
6
6
  "type": "object",
7
- "required": [
8
- "slug",
9
- "name",
10
- "description",
11
- "version"
12
- ],
7
+ "required": ["slug", "name", "description", "version"],
13
8
  "properties": {
14
9
  "slug": {
15
10
  "type": "string",
@@ -74,12 +69,7 @@
74
69
  "type": "array",
75
70
  "items": {
76
71
  "type": "string",
77
- "enum": [
78
- "image/png",
79
- "image/jpeg",
80
- "image/webp",
81
- "image/gif"
82
- ]
72
+ "enum": ["image/png", "image/jpeg", "image/webp", "image/gif"]
83
73
  },
84
74
  "description": "Accepted image MIME types (if accepts_images is true)"
85
75
  },
@@ -125,9 +115,7 @@
125
115
  },
126
116
  {
127
117
  "type": "object",
128
- "required": [
129
- "$ref"
130
- ],
118
+ "required": ["$ref"],
131
119
  "properties": {
132
120
  "$ref": {
133
121
  "type": "string",
@@ -185,9 +173,7 @@
185
173
  "$defs": {
186
174
  "tool": {
187
175
  "type": "object",
188
- "required": [
189
- "type"
190
- ],
176
+ "required": ["type"],
191
177
  "properties": {
192
178
  "type": {
193
179
  "type": "string",
@@ -4,9 +4,7 @@
4
4
  "title": "AILink Search Response",
5
5
  "description": "Base schema for AILink search/analysis response validation. Extensible for domain-specific responses.",
6
6
  "type": "object",
7
- "required": [
8
- "summary"
9
- ],
7
+ "required": ["summary"],
10
8
  "properties": {
11
9
  "summary": {
12
10
  "type": "string",
@@ -15,13 +13,7 @@
15
13
  },
16
14
  "risk_level": {
17
15
  "type": "string",
18
- "enum": [
19
- "low",
20
- "medium",
21
- "high",
22
- "critical",
23
- "unknown"
24
- ],
16
+ "enum": ["low", "medium", "high", "critical", "unknown"],
25
17
  "description": "Overall risk assessment"
26
18
  },
27
19
  "confidence": {
@@ -63,22 +55,11 @@
63
55
  "$defs": {
64
56
  "mention": {
65
57
  "type": "object",
66
- "required": [
67
- "source",
68
- "description"
69
- ],
58
+ "required": ["source", "description"],
70
59
  "properties": {
71
60
  "source": {
72
61
  "type": "string",
73
- "enum": [
74
- "web",
75
- "news",
76
- "social",
77
- "github",
78
- "registry",
79
- "trademark",
80
- "other"
81
- ],
62
+ "enum": ["web", "news", "social", "github", "registry", "trademark", "other"],
82
63
  "description": "Source type of the mention"
83
64
  },
84
65
  "description": {
@@ -92,21 +73,12 @@
92
73
  },
93
74
  "relevance": {
94
75
  "type": "string",
95
- "enum": [
96
- "high",
97
- "medium",
98
- "low"
99
- ],
76
+ "enum": ["high", "medium", "low"],
100
77
  "description": "Relevance to the query"
101
78
  },
102
79
  "sentiment": {
103
80
  "type": "string",
104
- "enum": [
105
- "positive",
106
- "neutral",
107
- "negative",
108
- "mixed"
109
- ],
81
+ "enum": ["positive", "neutral", "negative", "mixed"],
110
82
  "description": "Sentiment of the mention"
111
83
  },
112
84
  "date": {
@@ -119,9 +91,7 @@
119
91
  },
120
92
  "attachment": {
121
93
  "type": "object",
122
- "required": [
123
- "type"
124
- ],
94
+ "required": ["type"],
125
95
  "properties": {
126
96
  "type": {
127
97
  "type": "string",