@fulmenhq/tsfulmen 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +60 -7
- package/config/crucible-ts/agentic/roles/README.md +3 -3
- package/config/crucible-ts/library/fulencode/fixtures/README.md +18 -0
- package/config/crucible-ts/library/fulencode/fixtures/bom/bom.yaml +14 -0
- package/config/crucible-ts/library/fulencode/fixtures/detection/detection.yaml +12 -0
- package/config/crucible-ts/library/fulencode/fixtures/invalid-encodings/base64.yaml +10 -0
- package/config/crucible-ts/library/fulencode/fixtures/normalization/text-safe.yaml +10 -0
- package/config/crucible-ts/library/fulencode/fixtures/telemetry/telemetry-test-cases.yaml +24 -0
- package/config/crucible-ts/library/fulencode/fixtures/valid-encodings/base64.yaml +11 -0
- package/config/crucible-ts/taxonomy/library/platform-modules/v1.0.0/modules.yaml +2 -2
- package/config/crucible-ts/taxonomy/metrics.yaml +79 -1
- package/dist/appidentity/index.d.ts +1 -1
- package/dist/appidentity/index.js +122 -56
- package/dist/appidentity/index.js.map +1 -1
- package/dist/config/index.js +122 -56
- package/dist/config/index.js.map +1 -1
- package/dist/crucible/index.js +122 -56
- package/dist/crucible/index.js.map +1 -1
- package/dist/errors/index.d.ts +1 -1
- package/dist/errors/index.js +122 -56
- package/dist/errors/index.js.map +1 -1
- package/dist/foundry/index.js +122 -56
- package/dist/foundry/index.js.map +1 -1
- package/dist/fulencode/index.d.ts +102 -0
- package/dist/fulencode/index.js +806 -0
- package/dist/fulencode/index.js.map +1 -0
- package/dist/index.d.ts +2 -2
- package/dist/index.js +123 -57
- package/dist/index.js.map +1 -1
- package/dist/pathfinder/index.d.ts +1 -1
- package/dist/pathfinder/index.js +122 -56
- package/dist/pathfinder/index.js.map +1 -1
- package/dist/reports/license-inventory.csv +7 -6
- package/dist/schema/index.d.ts +2 -2
- package/dist/schema/index.js +122 -56
- package/dist/schema/index.js.map +1 -1
- package/dist/signals/index.js +122 -56
- package/dist/signals/index.js.map +1 -1
- package/dist/telemetry/http/index.js +122 -56
- package/dist/telemetry/http/index.js.map +1 -1
- package/dist/telemetry/index.d.ts +1 -1
- package/dist/telemetry/index.js +122 -56
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/prometheus/index.d.ts +1 -1
- package/dist/telemetry/prometheus/index.js +122 -56
- package/dist/telemetry/prometheus/index.js.map +1 -1
- package/dist/{types-BJswWpQC.d.ts → types-DdoeE7F5.d.ts} +1 -1
- package/package.json +7 -2
- package/schemas/crucible-ts/library/fulencode/v1.0.0/README.md +37 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/bom-result.schema.json +48 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/decode-options.schema.json +60 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/decoding-result.schema.json +70 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/detect-options.schema.json +25 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/detection-result.schema.json +57 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/encode-options.schema.json +71 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/encoding-result.schema.json +57 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/fulencode-config.schema.json +8 -4
- package/schemas/crucible-ts/library/fulencode/v1.0.0/fulencode-error.schema.json +66 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/normalization-result.schema.json +73 -0
- package/schemas/crucible-ts/library/fulencode/v1.0.0/normalize-options.schema.json +44 -0
- package/schemas/crucible-ts/meta/README.md +38 -2
- package/schemas/crucible-ts/meta/draft-04/schema.json +222 -0
- package/schemas/crucible-ts/meta/draft-06/schema.json +218 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/applicator.json +93 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/content.json +21 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/core.json +58 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/format.json +15 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/meta-data.json +35 -0
- package/schemas/crucible-ts/meta/draft-2019-09/meta/validation.json +119 -0
- package/schemas/crucible-ts/meta/draft-2019-09/offline.schema.json +148 -0
- package/schemas/crucible-ts/meta/draft-2019-09/schema.json +62 -0
- package/schemas/crucible-ts/meta/fixtures/draft-04-sample.json +16 -0
- package/schemas/crucible-ts/meta/fixtures/draft-06-sample.json +16 -0
- package/schemas/crucible-ts/meta/fixtures/draft-07-sample.json +34 -0
- package/schemas/crucible-ts/meta/fixtures/draft-2019-09-sample.json +21 -0
- package/schemas/crucible-ts/meta/fixtures/draft-2020-12-sample.json +21 -0
- package/schemas/crucible-ts/taxonomy/library/fulencode/normalization-profiles/v1.0.0/profiles.yaml +16 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/PROVENANCE.md +64 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/access-tier.dimension.json +103 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/retention-lifecycle.dimension.json +103 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/schema-stability.dimension.json +100 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/sensitivity.dimension.json +130 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/velocity-mode.dimension.json +79 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/volatility.dimension.json +72 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/config/classifiers/dimensions/volume-tier.dimension.json +66 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/catalog/classifiers/README.md +29 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/access-tier-classification.md +163 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/classifiers-framework.md +157 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/data-sensitivity-classification.md +259 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/retention-lifecycle-classification.md +200 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/schema-stability-classification.md +205 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/velocity-mode-classification.md +222 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volatility-classification.md +209 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volume-tier-classification.md +200 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/ailink/v0/README.md +48 -0
- package/schemas/crucible-ts/upstream/3leaps/{ailink → crucible/schemas/ailink}/v0/prompt.schema.json +4 -18
- package/schemas/crucible-ts/upstream/3leaps/{ailink → crucible/schemas/ailink}/v0/search-response.schema.json +7 -37
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/classifiers/v0/dimension-definition.schema.json +247 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/classifiers/v0/sensitivity-level.schema.json +67 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/error-response.schema.json +59 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/lifecycle-phases.data.json +102 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/lifecycle-phases.schema.json +101 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/release-phase.schema.json +18 -0
- package/schemas/crucible-ts/upstream/3leaps/crucible/schemas/foundation/v0/types.schema.json +177 -0
- package/schemas/crucible-ts/upstream/3leaps/PROVENANCE.md +0 -43
- /package/schemas/crucible-ts/upstream/3leaps/{agentic → crucible/schemas/agentic}/v0/role-prompt.schema.json +0 -0
package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volatility-classification.md
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Volatility & Update Cadence Classification"
|
|
3
|
+
description: "Standard for classifying data and configuration update frequency"
|
|
4
|
+
category: "standards"
|
|
5
|
+
status: "stable"
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
lastUpdated: "2026-01-22"
|
|
8
|
+
maintainer: "3leaps-core"
|
|
9
|
+
reviewers: ["platform", "data-engineering"]
|
|
10
|
+
approvers: ["3leapsdave"]
|
|
11
|
+
tags: ["classification", "volatility", "cadence", "scheduling", "data-lifecycle"]
|
|
12
|
+
content_license: "CC0"
|
|
13
|
+
relatedDocs:
|
|
14
|
+
- "schemas/classifiers/v0/dimension-definition.schema.json"
|
|
15
|
+
- "config/classifiers/dimensions/volatility.dimension.json"
|
|
16
|
+
- "docs/standards/data-sensitivity-classification.md"
|
|
17
|
+
audience: "all"
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
# Volatility & Update Cadence Classification
|
|
21
|
+
|
|
22
|
+
This standard defines update cadence levels for data and configuration across all 3leaps ecosystems. It provides a consistent framework for:
|
|
23
|
+
|
|
24
|
+
- **Freshness SLAs** - Setting expectations for data currency
|
|
25
|
+
- **Scheduling** - Determining batch job and pipeline frequencies
|
|
26
|
+
- **Partitioning** - Informing time-based partitioning strategies
|
|
27
|
+
- **Caching** - Setting appropriate TTLs and invalidation policies
|
|
28
|
+
- **Resource Planning** - Estimating compute and storage requirements
|
|
29
|
+
|
|
30
|
+
## Volatility Levels
|
|
31
|
+
|
|
32
|
+
Volatility is an **ordinal** dimension—higher values indicate more frequent updates.
|
|
33
|
+
|
|
34
|
+
| Level | Key | Description | Typical Use Cases |
|
|
35
|
+
| ----- | ----------- | ----------------------- | ------------------------------------------------------- |
|
|
36
|
+
| 0 | `unknown` | Not yet classified | New data sources pending classification |
|
|
37
|
+
| 1 | `static` | No scheduled updates | Reference data, schemas, standards, one-time snapshots |
|
|
38
|
+
| 2 | `monthly` | Roughly monthly batches | Financial reports, compliance audits, capacity planning |
|
|
39
|
+
| 3 | `weekly` | Roughly weekly batches | Product catalogs, pricing updates, aggregated metrics |
|
|
40
|
+
| 4 | `daily` | Daily batches | Transaction summaries, daily snapshots, ETL pipelines |
|
|
41
|
+
| 5 | `hourly` | Sub-daily batches | Operational metrics, near-real-time dashboards, alerts |
|
|
42
|
+
| 6 | `streaming` | Event-driven continuous | Real-time telemetry, live feeds, event sourcing |
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Level Details
|
|
47
|
+
|
|
48
|
+
### Unknown (0)
|
|
49
|
+
|
|
50
|
+
**Volatility not yet classified; must be classified before operational use.**
|
|
51
|
+
|
|
52
|
+
| Aspect | Requirement |
|
|
53
|
+
| ---------------- | ------------------------------------------------------------ |
|
|
54
|
+
| **Scheduling** | Prohibited until classified |
|
|
55
|
+
| **Caching** | Conservative defaults only (no long TTL assumptions) |
|
|
56
|
+
| **Partitioning** | Avoid production partition strategy decisions |
|
|
57
|
+
| **Use Cases** | New data feeds, imported datasets, unprofiled or new sources |
|
|
58
|
+
|
|
59
|
+
**Operational Notes**: Gate operational use on explicit classification. Do not treat missing or unknown volatility as “static” or “daily” by default.
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
### Static (1)
|
|
64
|
+
|
|
65
|
+
**No scheduled updates—one-time or ad-hoc changes only.**
|
|
66
|
+
|
|
67
|
+
- Changes require explicit versioning and release process
|
|
68
|
+
- Safe to cache indefinitely (until version changes)
|
|
69
|
+
- Examples: JSON schemas, role definitions, reference taxonomies
|
|
70
|
+
|
|
71
|
+
**Operational Implications**:
|
|
72
|
+
|
|
73
|
+
- No scheduled refresh jobs needed
|
|
74
|
+
- Version-based cache invalidation
|
|
75
|
+
- Changes go through PR/review process
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### Monthly (2)
|
|
80
|
+
|
|
81
|
+
**Batch updates roughly monthly.**
|
|
82
|
+
|
|
83
|
+
- Typically aligned with business cycles (month-end close, reporting periods)
|
|
84
|
+
- Allow 24-48 hour processing windows
|
|
85
|
+
- Examples: Financial statements, compliance reports, capacity forecasts
|
|
86
|
+
|
|
87
|
+
**Operational Implications**:
|
|
88
|
+
|
|
89
|
+
- Schedule during low-traffic windows
|
|
90
|
+
- Plan for larger batch sizes
|
|
91
|
+
- Coordinate with downstream consumers on refresh dates
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### Weekly (3)
|
|
96
|
+
|
|
97
|
+
**Batch updates roughly weekly.**
|
|
98
|
+
|
|
99
|
+
- Common for curated datasets that balance freshness and processing cost
|
|
100
|
+
- Examples: Product catalogs, aggregated analytics, weekly digests
|
|
101
|
+
|
|
102
|
+
**Operational Implications**:
|
|
103
|
+
|
|
104
|
+
- Typical refresh: weekends or early morning
|
|
105
|
+
- Moderate batch sizes
|
|
106
|
+
- Weekly SLA monitoring
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
### Daily (4)
|
|
111
|
+
|
|
112
|
+
**Daily batches—the most common cadence for operational data.**
|
|
113
|
+
|
|
114
|
+
- Standard for transactional summaries and operational reporting
|
|
115
|
+
- Examples: Daily sales, order summaries, log aggregations
|
|
116
|
+
|
|
117
|
+
**Operational Implications**:
|
|
118
|
+
|
|
119
|
+
- Nightly batch windows (typically 00:00-06:00)
|
|
120
|
+
- Date-partitioned storage recommended
|
|
121
|
+
- T+1 data availability expectations
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
### Hourly (5)
|
|
126
|
+
|
|
127
|
+
**Sub-daily batches—hourly or more frequent.**
|
|
128
|
+
|
|
129
|
+
- Bridges gap between batch and streaming
|
|
130
|
+
- Examples: Operational dashboards, alerting thresholds, rate limit counters
|
|
131
|
+
|
|
132
|
+
**Operational Implications**:
|
|
133
|
+
|
|
134
|
+
- Micro-batch processing
|
|
135
|
+
- Hour-partitioned or rolling windows
|
|
136
|
+
- Higher compute costs than daily
|
|
137
|
+
- Consider streaming if approaching minute-level freshness needs
|
|
138
|
+
|
|
139
|
+
---
|
|
140
|
+
|
|
141
|
+
### Streaming (6)
|
|
142
|
+
|
|
143
|
+
**Event-driven continuous updates—sub-minute latency.**
|
|
144
|
+
|
|
145
|
+
- True real-time processing
|
|
146
|
+
- Examples: Live telemetry, event sourcing, real-time fraud detection
|
|
147
|
+
|
|
148
|
+
**Operational Implications**:
|
|
149
|
+
|
|
150
|
+
- Requires streaming infrastructure (Kafka, Kinesis, Pulsar)
|
|
151
|
+
- Continuous compute costs
|
|
152
|
+
- Complex exactly-once semantics
|
|
153
|
+
- Backpressure and scaling considerations
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Decision Guide
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
How quickly must consumers see new data?
|
|
161
|
+
|
|
162
|
+
├── "Whenever we release a new version" → static
|
|
163
|
+
├── "By the end of the month" → monthly
|
|
164
|
+
├── "Within a week" → weekly
|
|
165
|
+
├── "Next business day" → daily
|
|
166
|
+
├── "Within hours" → hourly
|
|
167
|
+
└── "Immediately / real-time" → streaming
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Cost-Freshness Tradeoff
|
|
171
|
+
|
|
172
|
+
| Volatility | Relative Cost | Freshness | Complexity |
|
|
173
|
+
| ---------- | ------------- | ------------------- | ---------- |
|
|
174
|
+
| static | Lowest | Stale until release | Simplest |
|
|
175
|
+
| monthly | Low | Up to 30 days | Simple |
|
|
176
|
+
| weekly | Low-Medium | Up to 7 days | Simple |
|
|
177
|
+
| daily | Medium | Up to 24 hours | Moderate |
|
|
178
|
+
| hourly | Medium-High | Up to 1 hour | Moderate |
|
|
179
|
+
| streaming | Highest | Sub-minute | Complex |
|
|
180
|
+
|
|
181
|
+
**Guidance**: Start with the lowest volatility that meets business requirements. Upgrading to higher frequency is easier than optimizing an over-engineered streaming system.
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Combining with Other Dimensions
|
|
186
|
+
|
|
187
|
+
Volatility works alongside other classifiers:
|
|
188
|
+
|
|
189
|
+
| Combination | Implication |
|
|
190
|
+
| --------------------------------------------------- | ----------------------------------------------------- |
|
|
191
|
+
| `sensitivity: 4-personal` + `volatility: streaming` | Real-time PII requires streaming encryption and audit |
|
|
192
|
+
| `sensitivity: 0-public` + `volatility: static` | Cacheable forever, CDN-friendly |
|
|
193
|
+
| `volatility: daily` + partitioning | Use date-based partitions |
|
|
194
|
+
| `volatility: streaming` + storage | Consider append-only / event log storage |
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## Machine-Readable Definition
|
|
199
|
+
|
|
200
|
+
- **Dimension Config**: `config/classifiers/dimensions/volatility.dimension.json`
|
|
201
|
+
- **Schema**: `schemas/classifiers/v0/dimension-definition.schema.json`
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## Attribution
|
|
206
|
+
|
|
207
|
+
This standard is the canonical reference for volatility classification across 3leaps ecosystems. Downstream consumers should reference or vendor this standard rather than maintaining independent copies.
|
|
208
|
+
|
|
209
|
+
**Review Cycle**: Semiannual with platform and data engineering teams.
|
package/schemas/crucible-ts/upstream/3leaps/crucible/docs/standards/volume-tier-classification.md
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Volume Tier Classification"
|
|
3
|
+
description: "Data volume classification standard for scale planning"
|
|
4
|
+
category: "standards"
|
|
5
|
+
status: "stable"
|
|
6
|
+
version: "1.0.0"
|
|
7
|
+
lastUpdated: "2026-01-22"
|
|
8
|
+
maintainer: "3leaps-core"
|
|
9
|
+
reviewers: ["platform", "data-engineering"]
|
|
10
|
+
approvers: ["3leapsdave"]
|
|
11
|
+
tags: ["classification", "volume", "scale", "data-engineering", "partitioning"]
|
|
12
|
+
content_license: "CC0"
|
|
13
|
+
relatedDocs:
|
|
14
|
+
- "docs/standards/velocity-mode-classification.md"
|
|
15
|
+
- "config/classifiers/dimensions/volume-tier.dimension.json"
|
|
16
|
+
audience: "all"
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
# Volume Tier Classification
|
|
20
|
+
|
|
21
|
+
This standard defines volume tier levels for data across all 3leaps ecosystems. It provides a consistent framework for:
|
|
22
|
+
|
|
23
|
+
- **Scale Planning** - Estimating infrastructure requirements
|
|
24
|
+
- **Partitioning Strategies** - Optimal data organization
|
|
25
|
+
- **File Sizing** - Appropriate chunk sizes for processing
|
|
26
|
+
- **Storage Selection** - Choosing storage systems and formats
|
|
27
|
+
- **Benchmark Profiles** - Performance testing at appropriate scale
|
|
28
|
+
|
|
29
|
+
Volume tier is an **ordinal** dimension—higher values indicate larger scale.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Volume Tiers
|
|
34
|
+
|
|
35
|
+
| Tier | Row Count | Typical Size | Processing Model |
|
|
36
|
+
| ----------- | --------- | ------------ | -------------------------- |
|
|
37
|
+
| **unknown** | Unknown | Unknown | Cannot provision |
|
|
38
|
+
| **tiny** | ≤100K | <100 MB | In-memory, single file |
|
|
39
|
+
| **small** | ≤10M | <10 GB | Single-node |
|
|
40
|
+
| **medium** | ≤1B | <1 TB | Distributed beneficial |
|
|
41
|
+
| **large** | ≤100B | <100 TB | Distributed required |
|
|
42
|
+
| **massive** | >100B | >100 TB | Specialized infrastructure |
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Tier Details
|
|
47
|
+
|
|
48
|
+
### Unknown
|
|
49
|
+
|
|
50
|
+
**Volume not yet classified; must be classified before infrastructure provisioning.**
|
|
51
|
+
|
|
52
|
+
| Aspect | Guidance |
|
|
53
|
+
| ------------------ | ----------------------------------------- |
|
|
54
|
+
| **Processing** | Unknown; cannot provision infrastructure |
|
|
55
|
+
| **Storage Format** | Staging only |
|
|
56
|
+
| **Partitioning** | Cannot determine |
|
|
57
|
+
| **Infrastructure** | Quarantine/staging environment |
|
|
58
|
+
| **Use Cases** | New data feeds, imports pending profiling |
|
|
59
|
+
|
|
60
|
+
**Operational Notes**: Gate infrastructure provisioning decisions on explicit classification. Profile data to determine appropriate tier before production deployment.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
### Tiny (≤100K rows)
|
|
65
|
+
|
|
66
|
+
**Very small datasets; single-file, in-memory processing.**
|
|
67
|
+
|
|
68
|
+
| Aspect | Guidance |
|
|
69
|
+
| ------------------ | ----------------------------------------- |
|
|
70
|
+
| **Processing** | In-memory (pandas, DuckDB, etc.) |
|
|
71
|
+
| **Storage Format** | CSV, JSON, single Parquet file |
|
|
72
|
+
| **Partitioning** | None needed |
|
|
73
|
+
| **Infrastructure** | Local machine, small container |
|
|
74
|
+
| **Use Cases** | Test fixtures, config data, lookup tables |
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
### Small (≤10M rows)
|
|
79
|
+
|
|
80
|
+
**Small datasets; single-node processing, moderate file sizes.**
|
|
81
|
+
|
|
82
|
+
| Aspect | Guidance |
|
|
83
|
+
| ------------------ | --------------------------------------------- |
|
|
84
|
+
| **Processing** | Single-node (laptop, small VM) |
|
|
85
|
+
| **Storage Format** | Parquet, CSV with compression |
|
|
86
|
+
| **Partitioning** | Optional (by date if time-series) |
|
|
87
|
+
| **Infrastructure** | Standard compute, local SSD |
|
|
88
|
+
| **Use Cases** | Product catalogs, user tables, reference data |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
### Medium (≤1B rows)
|
|
93
|
+
|
|
94
|
+
**Medium datasets; partitioned storage, distributed processing beneficial.**
|
|
95
|
+
|
|
96
|
+
| Aspect | Guidance |
|
|
97
|
+
| ------------------ | ------------------------------------------ |
|
|
98
|
+
| **Processing** | Distributed beneficial (Spark, Dask) |
|
|
99
|
+
| **Storage Format** | Columnar (Parquet, ORC) required |
|
|
100
|
+
| **Partitioning** | Required (date, key columns) |
|
|
101
|
+
| **Infrastructure** | Cloud data warehouse, distributed compute |
|
|
102
|
+
| **Use Cases** | Transaction history, event logs, analytics |
|
|
103
|
+
|
|
104
|
+
**Optimization Tips**:
|
|
105
|
+
|
|
106
|
+
- Partition by date for time-series data
|
|
107
|
+
- Use predicate pushdown for queries
|
|
108
|
+
- Consider data lake with metadata layer
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
### Large (≤100B rows)
|
|
113
|
+
|
|
114
|
+
**Large datasets; distributed processing required, columnar formats.**
|
|
115
|
+
|
|
116
|
+
| Aspect | Guidance |
|
|
117
|
+
| ------------------ | ---------------------------------------- |
|
|
118
|
+
| **Processing** | Distributed required (Spark, Presto) |
|
|
119
|
+
| **Storage Format** | Columnar with compression (Parquet+Zstd) |
|
|
120
|
+
| **Partitioning** | Multi-level (date + key) |
|
|
121
|
+
| **Infrastructure** | Data lake, distributed compute clusters |
|
|
122
|
+
| **Use Cases** | Telemetry, clickstream, IoT sensors |
|
|
123
|
+
|
|
124
|
+
**Optimization Tips**:
|
|
125
|
+
|
|
126
|
+
- Aggressive partitioning and clustering
|
|
127
|
+
- Z-ordering or data skipping indexes
|
|
128
|
+
- Consider separate hot/warm/cold storage
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
### Massive (>100B rows)
|
|
133
|
+
|
|
134
|
+
**Massive datasets; specialized infrastructure, aggressive partitioning.**
|
|
135
|
+
|
|
136
|
+
| Aspect | Guidance |
|
|
137
|
+
| ------------------ | ------------------------------------------------ |
|
|
138
|
+
| **Processing** | Specialized systems (BigQuery, Redshift, custom) |
|
|
139
|
+
| **Storage Format** | Native formats, custom codecs |
|
|
140
|
+
| **Partitioning** | Heavy (multi-dimension, sharding) |
|
|
141
|
+
| **Infrastructure** | Enterprise data platforms, dedicated clusters |
|
|
142
|
+
| **Use Cases** | Global clickstream, genomics, simulation |
|
|
143
|
+
|
|
144
|
+
**Optimization Tips**:
|
|
145
|
+
|
|
146
|
+
- Work with platform specialists
|
|
147
|
+
- Consider materialized views/aggregates
|
|
148
|
+
- Pre-compute common queries
|
|
149
|
+
- Evaluate specialized databases
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Decision Guide
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
How many rows in your dataset?
|
|
157
|
+
|
|
158
|
+
├── Thousands (≤100K) → tiny
|
|
159
|
+
├── Millions (≤10M) → small
|
|
160
|
+
├── Hundreds of millions (≤1B) → medium
|
|
161
|
+
├── Tens of billions (≤100B) → large
|
|
162
|
+
└── Hundreds of billions+ → massive
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Infrastructure Recommendations
|
|
168
|
+
|
|
169
|
+
| Volume Tier | Storage System | Compute | Format |
|
|
170
|
+
| ----------- | -------------------------- | ---------------------- | ------------------ |
|
|
171
|
+
| **tiny** | Local FS, S3 single file | Local, small container | CSV, JSON |
|
|
172
|
+
| **small** | S3/GCS, local SSD | Single VM, serverless | Parquet |
|
|
173
|
+
| **medium** | Data lake (Delta, Iceberg) | Spark, serverless SQL | Parquet + metadata |
|
|
174
|
+
| **large** | Data lake, warehouse | Spark cluster, Presto | Parquet + Zstd |
|
|
175
|
+
| **massive** | Enterprise DW, BigQuery | Dedicated clusters | Native formats |
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## Combining with Other Dimensions
|
|
180
|
+
|
|
181
|
+
| Combination | Implication |
|
|
182
|
+
| --------------------------------------- | ------------------------------------------ |
|
|
183
|
+
| `volume: large` + `velocity: streaming` | Requires streaming infrastructure at scale |
|
|
184
|
+
| `volume: tiny` + `sensitivity: 4` | Small but needs secure handling |
|
|
185
|
+
| `volume: massive` + `retention: long` | Archive storage strategy critical |
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Machine-Readable Definition
|
|
190
|
+
|
|
191
|
+
- **Dimension Config**: `config/classifiers/dimensions/volume-tier.dimension.json`
|
|
192
|
+
- **Schema**: `schemas/classifiers/v0/dimension-definition.schema.json`
|
|
193
|
+
|
|
194
|
+
---
|
|
195
|
+
|
|
196
|
+
## Attribution
|
|
197
|
+
|
|
198
|
+
This standard is the canonical reference for volume tier classification across 3leaps ecosystems. Downstream consumers should reference or vendor this standard rather than maintaining independent copies.
|
|
199
|
+
|
|
200
|
+
**Review Cycle**: Semiannual with platform and data engineering teams.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# AILink Schemas v0
|
|
2
|
+
|
|
3
|
+
Schemas for AI-powered backend integration.
|
|
4
|
+
|
|
5
|
+
**Status**: Unstable (v0) - breaking changes may occur without notice.
|
|
6
|
+
|
|
7
|
+
## Schemas
|
|
8
|
+
|
|
9
|
+
| Schema | Purpose |
|
|
10
|
+
| ----------------------------- | ------------------------------------------- |
|
|
11
|
+
| `prompt.schema.json` | AILink prompt configuration |
|
|
12
|
+
| `search-response.schema.json` | Base response structure for search/analysis |
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
### Prompt Configuration
|
|
17
|
+
|
|
18
|
+
Prompts use YAML frontmatter validated against `prompt.schema.json`:
|
|
19
|
+
|
|
20
|
+
```yaml
|
|
21
|
+
slug: my-prompt
|
|
22
|
+
name: My Prompt
|
|
23
|
+
description: What this prompt does
|
|
24
|
+
version: 1.0.0
|
|
25
|
+
input:
|
|
26
|
+
required_variables:
|
|
27
|
+
- query
|
|
28
|
+
tools:
|
|
29
|
+
- type: web_search
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The prompt body (after `---`) contains the system template in markdown.
|
|
33
|
+
|
|
34
|
+
### Response Validation
|
|
35
|
+
|
|
36
|
+
Responses are validated against `search-response.schema.json` or a domain-specific schema that extends it.
|
|
37
|
+
|
|
38
|
+
## Schema URLs
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
https://schemas.3leaps.dev/ailink/v0/prompt.schema.json
|
|
42
|
+
https://schemas.3leaps.dev/ailink/v0/search-response.schema.json
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Related
|
|
46
|
+
|
|
47
|
+
- [namelens/namelens](https://github.com/namelens/namelens) - Reference implementation
|
|
48
|
+
- [FulmenHQ Crucible](https://github.com/fulmenhq/crucible) - Enterprise extensions
|
package/schemas/crucible-ts/upstream/3leaps/{ailink → crucible/schemas/ailink}/v0/prompt.schema.json
RENAMED
|
@@ -4,12 +4,7 @@
|
|
|
4
4
|
"title": "AILink Prompt Configuration",
|
|
5
5
|
"description": "Schema for AILink prompt configuration files. Prompts define how AI backends process requests, including input variables, tools, and response validation.",
|
|
6
6
|
"type": "object",
|
|
7
|
-
"required": [
|
|
8
|
-
"slug",
|
|
9
|
-
"name",
|
|
10
|
-
"description",
|
|
11
|
-
"version"
|
|
12
|
-
],
|
|
7
|
+
"required": ["slug", "name", "description", "version"],
|
|
13
8
|
"properties": {
|
|
14
9
|
"slug": {
|
|
15
10
|
"type": "string",
|
|
@@ -74,12 +69,7 @@
|
|
|
74
69
|
"type": "array",
|
|
75
70
|
"items": {
|
|
76
71
|
"type": "string",
|
|
77
|
-
"enum": [
|
|
78
|
-
"image/png",
|
|
79
|
-
"image/jpeg",
|
|
80
|
-
"image/webp",
|
|
81
|
-
"image/gif"
|
|
82
|
-
]
|
|
72
|
+
"enum": ["image/png", "image/jpeg", "image/webp", "image/gif"]
|
|
83
73
|
},
|
|
84
74
|
"description": "Accepted image MIME types (if accepts_images is true)"
|
|
85
75
|
},
|
|
@@ -125,9 +115,7 @@
|
|
|
125
115
|
},
|
|
126
116
|
{
|
|
127
117
|
"type": "object",
|
|
128
|
-
"required": [
|
|
129
|
-
"$ref"
|
|
130
|
-
],
|
|
118
|
+
"required": ["$ref"],
|
|
131
119
|
"properties": {
|
|
132
120
|
"$ref": {
|
|
133
121
|
"type": "string",
|
|
@@ -185,9 +173,7 @@
|
|
|
185
173
|
"$defs": {
|
|
186
174
|
"tool": {
|
|
187
175
|
"type": "object",
|
|
188
|
-
"required": [
|
|
189
|
-
"type"
|
|
190
|
-
],
|
|
176
|
+
"required": ["type"],
|
|
191
177
|
"properties": {
|
|
192
178
|
"type": {
|
|
193
179
|
"type": "string",
|
|
@@ -4,9 +4,7 @@
|
|
|
4
4
|
"title": "AILink Search Response",
|
|
5
5
|
"description": "Base schema for AILink search/analysis response validation. Extensible for domain-specific responses.",
|
|
6
6
|
"type": "object",
|
|
7
|
-
"required": [
|
|
8
|
-
"summary"
|
|
9
|
-
],
|
|
7
|
+
"required": ["summary"],
|
|
10
8
|
"properties": {
|
|
11
9
|
"summary": {
|
|
12
10
|
"type": "string",
|
|
@@ -15,13 +13,7 @@
|
|
|
15
13
|
},
|
|
16
14
|
"risk_level": {
|
|
17
15
|
"type": "string",
|
|
18
|
-
"enum": [
|
|
19
|
-
"low",
|
|
20
|
-
"medium",
|
|
21
|
-
"high",
|
|
22
|
-
"critical",
|
|
23
|
-
"unknown"
|
|
24
|
-
],
|
|
16
|
+
"enum": ["low", "medium", "high", "critical", "unknown"],
|
|
25
17
|
"description": "Overall risk assessment"
|
|
26
18
|
},
|
|
27
19
|
"confidence": {
|
|
@@ -63,22 +55,11 @@
|
|
|
63
55
|
"$defs": {
|
|
64
56
|
"mention": {
|
|
65
57
|
"type": "object",
|
|
66
|
-
"required": [
|
|
67
|
-
"source",
|
|
68
|
-
"description"
|
|
69
|
-
],
|
|
58
|
+
"required": ["source", "description"],
|
|
70
59
|
"properties": {
|
|
71
60
|
"source": {
|
|
72
61
|
"type": "string",
|
|
73
|
-
"enum": [
|
|
74
|
-
"web",
|
|
75
|
-
"news",
|
|
76
|
-
"social",
|
|
77
|
-
"github",
|
|
78
|
-
"registry",
|
|
79
|
-
"trademark",
|
|
80
|
-
"other"
|
|
81
|
-
],
|
|
62
|
+
"enum": ["web", "news", "social", "github", "registry", "trademark", "other"],
|
|
82
63
|
"description": "Source type of the mention"
|
|
83
64
|
},
|
|
84
65
|
"description": {
|
|
@@ -92,21 +73,12 @@
|
|
|
92
73
|
},
|
|
93
74
|
"relevance": {
|
|
94
75
|
"type": "string",
|
|
95
|
-
"enum": [
|
|
96
|
-
"high",
|
|
97
|
-
"medium",
|
|
98
|
-
"low"
|
|
99
|
-
],
|
|
76
|
+
"enum": ["high", "medium", "low"],
|
|
100
77
|
"description": "Relevance to the query"
|
|
101
78
|
},
|
|
102
79
|
"sentiment": {
|
|
103
80
|
"type": "string",
|
|
104
|
-
"enum": [
|
|
105
|
-
"positive",
|
|
106
|
-
"neutral",
|
|
107
|
-
"negative",
|
|
108
|
-
"mixed"
|
|
109
|
-
],
|
|
81
|
+
"enum": ["positive", "neutral", "negative", "mixed"],
|
|
110
82
|
"description": "Sentiment of the mention"
|
|
111
83
|
},
|
|
112
84
|
"date": {
|
|
@@ -119,9 +91,7 @@
|
|
|
119
91
|
},
|
|
120
92
|
"attachment": {
|
|
121
93
|
"type": "object",
|
|
122
|
-
"required": [
|
|
123
|
-
"type"
|
|
124
|
-
],
|
|
94
|
+
"required": ["type"],
|
|
125
95
|
"properties": {
|
|
126
96
|
"type": {
|
|
127
97
|
"type": "string",
|