@chkit/plugin-backfill 0.1.0-beta.2 → 0.1.0-beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. package/README.md +170 -0
  2. package/dist/args.d.ts +109 -6
  3. package/dist/args.d.ts.map +1 -1
  4. package/dist/args.js +73 -97
  5. package/dist/args.js.map +1 -1
  6. package/dist/async-backfill.d.ts +64 -0
  7. package/dist/async-backfill.d.ts.map +1 -0
  8. package/dist/async-backfill.js +251 -0
  9. package/dist/async-backfill.js.map +1 -0
  10. package/dist/check.d.ts +9 -0
  11. package/dist/check.d.ts.map +1 -0
  12. package/dist/check.js +79 -0
  13. package/dist/check.js.map +1 -0
  14. package/dist/chunking/analyze.d.ts +8 -0
  15. package/dist/chunking/analyze.d.ts.map +1 -0
  16. package/dist/chunking/analyze.js +8 -0
  17. package/dist/chunking/analyze.js.map +1 -0
  18. package/dist/chunking/boundary-codec.d.ts +10 -0
  19. package/dist/chunking/boundary-codec.d.ts.map +1 -0
  20. package/dist/chunking/boundary-codec.js +79 -0
  21. package/dist/chunking/boundary-codec.js.map +1 -0
  22. package/dist/chunking/build.d.ts +11 -0
  23. package/dist/chunking/build.d.ts.map +1 -0
  24. package/dist/chunking/build.js +51 -0
  25. package/dist/chunking/build.js.map +1 -0
  26. package/dist/chunking/e2e/constants.d.ts +2 -0
  27. package/dist/chunking/e2e/constants.d.ts.map +1 -0
  28. package/dist/chunking/e2e/constants.js +2 -0
  29. package/dist/chunking/e2e/constants.js.map +1 -0
  30. package/dist/chunking/e2e/seed-datasets.script.d.ts +20 -0
  31. package/dist/chunking/e2e/seed-datasets.script.d.ts.map +1 -0
  32. package/dist/chunking/e2e/seed-datasets.script.js +134 -0
  33. package/dist/chunking/e2e/seed-datasets.script.js.map +1 -0
  34. package/dist/chunking/introspect.d.ts +40 -0
  35. package/dist/chunking/introspect.d.ts.map +1 -0
  36. package/dist/chunking/introspect.js +187 -0
  37. package/dist/chunking/introspect.js.map +1 -0
  38. package/dist/chunking/partition-slices.d.ts +14 -0
  39. package/dist/chunking/partition-slices.d.ts.map +1 -0
  40. package/dist/chunking/partition-slices.js +111 -0
  41. package/dist/chunking/partition-slices.js.map +1 -0
  42. package/dist/chunking/planner.d.ts +3 -0
  43. package/dist/chunking/planner.d.ts.map +1 -0
  44. package/dist/chunking/planner.js +343 -0
  45. package/dist/chunking/planner.js.map +1 -0
  46. package/dist/chunking/services/distribution-source.d.ts +11 -0
  47. package/dist/chunking/services/distribution-source.d.ts.map +1 -0
  48. package/dist/chunking/services/distribution-source.js +60 -0
  49. package/dist/chunking/services/distribution-source.js.map +1 -0
  50. package/dist/chunking/services/metadata-source.d.ts +4 -0
  51. package/dist/chunking/services/metadata-source.d.ts.map +1 -0
  52. package/dist/chunking/services/metadata-source.js +138 -0
  53. package/dist/chunking/services/metadata-source.js.map +1 -0
  54. package/dist/chunking/services/row-probe.d.ts +14 -0
  55. package/dist/chunking/services/row-probe.d.ts.map +1 -0
  56. package/dist/chunking/services/row-probe.js +62 -0
  57. package/dist/chunking/services/row-probe.js.map +1 -0
  58. package/dist/chunking/splitter.d.ts +20 -0
  59. package/dist/chunking/splitter.d.ts.map +1 -0
  60. package/dist/chunking/splitter.js +76 -0
  61. package/dist/chunking/splitter.js.map +1 -0
  62. package/dist/chunking/sql.d.ts +20 -0
  63. package/dist/chunking/sql.d.ts.map +1 -0
  64. package/dist/chunking/sql.js +304 -0
  65. package/dist/chunking/sql.js.map +1 -0
  66. package/dist/chunking/strategies/equal-width-split.d.ts +4 -0
  67. package/dist/chunking/strategies/equal-width-split.d.ts.map +1 -0
  68. package/dist/chunking/strategies/equal-width-split.js +46 -0
  69. package/dist/chunking/strategies/equal-width-split.js.map +1 -0
  70. package/dist/chunking/strategies/group-by-key-split.d.ts +3 -0
  71. package/dist/chunking/strategies/group-by-key-split.d.ts.map +1 -0
  72. package/dist/chunking/strategies/group-by-key-split.js +54 -0
  73. package/dist/chunking/strategies/group-by-key-split.js.map +1 -0
  74. package/dist/chunking/strategies/metadata-single-chunk.d.ts +3 -0
  75. package/dist/chunking/strategies/metadata-single-chunk.d.ts.map +1 -0
  76. package/dist/chunking/strategies/metadata-single-chunk.js +5 -0
  77. package/dist/chunking/strategies/metadata-single-chunk.js.map +1 -0
  78. package/dist/chunking/strategies/quantile-range-split.d.ts +5 -0
  79. package/dist/chunking/strategies/quantile-range-split.d.ts.map +1 -0
  80. package/dist/chunking/strategies/quantile-range-split.js +132 -0
  81. package/dist/chunking/strategies/quantile-range-split.js.map +1 -0
  82. package/dist/chunking/strategies/refinement.d.ts +4 -0
  83. package/dist/chunking/strategies/refinement.d.ts.map +1 -0
  84. package/dist/chunking/strategies/refinement.js +61 -0
  85. package/dist/chunking/strategies/refinement.js.map +1 -0
  86. package/dist/chunking/strategies/string-prefix-split.d.ts +4 -0
  87. package/dist/chunking/strategies/string-prefix-split.d.ts.map +1 -0
  88. package/dist/chunking/strategies/string-prefix-split.js +73 -0
  89. package/dist/chunking/strategies/string-prefix-split.js.map +1 -0
  90. package/dist/chunking/strategies/temporal-bucket-split.d.ts +4 -0
  91. package/dist/chunking/strategies/temporal-bucket-split.d.ts.map +1 -0
  92. package/dist/chunking/strategies/temporal-bucket-split.js +67 -0
  93. package/dist/chunking/strategies/temporal-bucket-split.js.map +1 -0
  94. package/dist/chunking/strategy-policy.d.ts +3 -0
  95. package/dist/chunking/strategy-policy.d.ts.map +1 -0
  96. package/dist/chunking/strategy-policy.js +4 -0
  97. package/dist/chunking/strategy-policy.js.map +1 -0
  98. package/dist/chunking/types.d.ts +139 -0
  99. package/dist/chunking/types.d.ts.map +1 -0
  100. package/dist/chunking/types.js +2 -0
  101. package/dist/chunking/types.js.map +1 -0
  102. package/dist/chunking/utils/binary-string.d.ts +8 -0
  103. package/dist/chunking/utils/binary-string.d.ts.map +1 -0
  104. package/dist/chunking/utils/binary-string.js +52 -0
  105. package/dist/chunking/utils/binary-string.js.map +1 -0
  106. package/dist/chunking/utils/ids.d.ts +4 -0
  107. package/dist/chunking/utils/ids.d.ts.map +1 -0
  108. package/dist/chunking/utils/ids.js +11 -0
  109. package/dist/chunking/utils/ids.js.map +1 -0
  110. package/dist/chunking/utils/ranges.d.ts +5 -0
  111. package/dist/chunking/utils/ranges.d.ts.map +1 -0
  112. package/dist/chunking/utils/ranges.js +19 -0
  113. package/dist/chunking/utils/ranges.js.map +1 -0
  114. package/dist/detect.d.ts +13 -0
  115. package/dist/detect.d.ts.map +1 -0
  116. package/dist/detect.js +113 -0
  117. package/dist/detect.js.map +1 -0
  118. package/dist/index.d.ts +3 -0
  119. package/dist/index.d.ts.map +1 -1
  120. package/dist/index.js +1 -0
  121. package/dist/index.js.map +1 -1
  122. package/dist/logging.d.ts +12 -0
  123. package/dist/logging.d.ts.map +1 -0
  124. package/dist/logging.js +61 -0
  125. package/dist/logging.js.map +1 -0
  126. package/dist/options.d.ts +151 -4
  127. package/dist/options.d.ts.map +1 -1
  128. package/dist/options.js +161 -109
  129. package/dist/options.js.map +1 -1
  130. package/dist/payload.d.ts +7 -17
  131. package/dist/payload.d.ts.map +1 -1
  132. package/dist/payload.js +7 -19
  133. package/dist/payload.js.map +1 -1
  134. package/dist/planner.d.ts +10 -8
  135. package/dist/planner.d.ts.map +1 -1
  136. package/dist/planner.js +76 -97
  137. package/dist/planner.js.map +1 -1
  138. package/dist/plugin.d.ts +4 -3
  139. package/dist/plugin.d.ts.map +1 -1
  140. package/dist/plugin.js +311 -215
  141. package/dist/plugin.js.map +1 -1
  142. package/dist/queries.d.ts +21 -0
  143. package/dist/queries.d.ts.map +1 -0
  144. package/dist/queries.js +113 -0
  145. package/dist/queries.js.map +1 -0
  146. package/dist/runtime.d.ts +14 -0
  147. package/dist/runtime.d.ts.map +1 -1
  148. package/dist/runtime.js +162 -83
  149. package/dist/runtime.js.map +1 -1
  150. package/dist/sdk.d.ts +12 -0
  151. package/dist/sdk.d.ts.map +1 -0
  152. package/dist/sdk.js +9 -0
  153. package/dist/sdk.js.map +1 -0
  154. package/dist/state.d.ts +16 -28
  155. package/dist/state.d.ts.map +1 -1
  156. package/dist/state.js +73 -127
  157. package/dist/state.js.map +1 -1
  158. package/dist/table-config.d.ts +9 -0
  159. package/dist/table-config.d.ts.map +1 -0
  160. package/dist/table-config.js +2 -0
  161. package/dist/table-config.js.map +1 -0
  162. package/dist/types.d.ts +49 -114
  163. package/dist/types.d.ts.map +1 -1
  164. package/package.json +31 -2
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Seeds ClickHouse tables for smart-chunking E2E tests.
4
+ *
5
+ * Run once manually:
6
+ * bun run packages/plugin-backfill/src/chunking/e2e/seed.ts
7
+ *
8
+ * Requires CLICKHOUSE_HOST/CLICKHOUSE_URL + CLICKHOUSE_PASSWORD env vars.
9
+ * Creates tables if they don't exist, truncates them, and re-inserts data.
10
+ */
11
+ import { randomBytes } from 'node:crypto';
12
+ import { getRequiredEnv, createLiveExecutor } from '@chkit/clickhouse/e2e-testkit';
13
+ import { TABLE_PREFIX } from './constants.js';
14
+ function pad(bytes) {
15
+ return randomBytes(bytes).toString('hex');
16
+ }
17
+ function dayHour(day, hour) {
18
+ return `2026-01-${String(day).padStart(2, '0')} ${String(hour).padStart(2, '0')}:00:00`;
19
+ }
20
+ export const datasets = [
21
+ {
22
+ name: 'skewed_power_law',
23
+ columns: [
24
+ 'tenant_id String',
25
+ 'seq UInt64',
26
+ 'event_time DateTime',
27
+ 'padding String',
28
+ ].join(', '),
29
+ orderBy: '(tenant_id, seq)',
30
+ partitionBy: 'toYYYYMM(event_time)',
31
+ generate() {
32
+ const rows = [];
33
+ // 80%: single dominant tenant — 8,000 rows
34
+ for (let i = 0; i < 8000; i++) {
35
+ rows.push({
36
+ tenant_id: 'mega-corp',
37
+ seq: i,
38
+ event_time: dayHour(1 + (i % 28), i % 24),
39
+ padding: pad(512),
40
+ });
41
+ }
42
+ // 20%: 200 small tenants, 10 rows each — 2,000 rows
43
+ for (let t = 0; t < 200; t++) {
44
+ for (let i = 0; i < 10; i++) {
45
+ rows.push({
46
+ tenant_id: `tenant-${String(t).padStart(4, '0')}`,
47
+ seq: i,
48
+ event_time: dayHour(1 + ((t * 10 + i) % 28), (t + i) % 24),
49
+ padding: pad(512),
50
+ });
51
+ }
52
+ }
53
+ return rows;
54
+ },
55
+ },
56
+ {
57
+ name: 'multiple_hot_keys',
58
+ columns: [
59
+ 'tenant_id String',
60
+ 'seq UInt64',
61
+ 'event_time DateTime',
62
+ 'padding String',
63
+ ].join(', '),
64
+ orderBy: '(tenant_id, seq)',
65
+ partitionBy: 'toYYYYMM(event_time)',
66
+ generate() {
67
+ const rows = [];
68
+ // 3 hot tenants, ~30% each — 3,000 rows each = 9,000 rows
69
+ for (const tenant of ['alpha-corp', 'beta-corp', 'gamma-corp']) {
70
+ for (let i = 0; i < 3000; i++) {
71
+ rows.push({
72
+ tenant_id: tenant,
73
+ seq: i,
74
+ event_time: dayHour(1 + (i % 28), i % 24),
75
+ padding: pad(512),
76
+ });
77
+ }
78
+ }
79
+ // 10%: 100 small tenants, 10 rows each — 1,000 rows
80
+ for (let t = 0; t < 100; t++) {
81
+ for (let i = 0; i < 10; i++) {
82
+ rows.push({
83
+ tenant_id: `small-${String(t).padStart(4, '0')}`,
84
+ seq: i,
85
+ event_time: dayHour(1 + ((t * 10 + i) % 28), (t + i) % 24),
86
+ padding: pad(512),
87
+ });
88
+ }
89
+ }
90
+ return rows;
91
+ },
92
+ },
93
+ ];
94
+ const BATCH_SIZE = 5000;
95
+ async function seed() {
96
+ const env = getRequiredEnv();
97
+ const executor = createLiveExecutor(env);
98
+ const db = env.clickhouseDatabase;
99
+ try {
100
+ for (const dataset of datasets) {
101
+ const table = `${TABLE_PREFIX}_${dataset.name}`;
102
+ const fqn = `${db}.${table}`;
103
+ console.log(`\n--- Seeding ${fqn} ---`);
104
+ await executor.command(`
105
+ CREATE TABLE IF NOT EXISTS ${fqn} (
106
+ ${dataset.columns}
107
+ ) ENGINE = MergeTree()
108
+ PARTITION BY ${dataset.partitionBy}
109
+ ORDER BY ${dataset.orderBy}
110
+ `);
111
+ console.log(' Table ensured.');
112
+ await executor.command(`TRUNCATE TABLE ${fqn}`);
113
+ console.log(' Truncated.');
114
+ const rows = dataset.generate();
115
+ for (let i = 0; i < rows.length; i += BATCH_SIZE) {
116
+ const batch = rows.slice(i, i + BATCH_SIZE);
117
+ await executor.insert({ table: fqn, values: batch });
118
+ console.log(` Inserted ${Math.min(i + BATCH_SIZE, rows.length)} / ${rows.length} rows`);
119
+ }
120
+ // Verify
121
+ const [result] = await executor.query(`SELECT count() AS cnt FROM ${fqn} SETTINGS select_sequential_consistency = 1`);
122
+ console.log(` Verified: ${result?.cnt} rows`);
123
+ }
124
+ }
125
+ finally {
126
+ await executor.close();
127
+ }
128
+ console.log('\nDone!');
129
+ }
130
+ seed().catch((error) => {
131
+ console.error(error);
132
+ process.exit(1);
133
+ });
134
+ //# sourceMappingURL=seed-datasets.script.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"seed-datasets.script.js","sourceRoot":"","sources":["../../../src/chunking/e2e/seed-datasets.script.ts"],"names":[],"mappings":";AAEA;;;;;;;;GAQG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AACzC,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAA;AAElF,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAA;AAU7C,SAAS,GAAG,CAAC,KAAa;IACxB,OAAO,WAAW,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAA;AAC3C,CAAC;AAED,SAAS,OAAO,CAAC,GAAW,EAAE,IAAY;IACxC,OAAO,WAAW,MAAM,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,QAAQ,CAAA;AACzF,CAAC;AAED,MAAM,CAAC,MAAM,QAAQ,GAAoB;IACvC;QACE,IAAI,EAAE,kBAAkB;QACxB,OAAO,EAAE;YACP,kBAAkB;YAClB,YAAY;YACZ,qBAAqB;YACrB,gBAAgB;SACjB,CAAC,IAAI,CAAC,IAAI,CAAC;QACZ,OAAO,EAAE,kBAAkB;QAC3B,WAAW,EAAE,sBAAsB;QACnC,QAAQ;YACN,MAAM,IAAI,GAA8B,EAAE,CAAA;YAE1C,2CAA2C;YAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9B,IAAI,CAAC,IAAI,CAAC;oBACR,SAAS,EAAE,WAAW;oBACtB,GAAG,EAAE,CAAC;oBACN,UAAU,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;oBACzC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;iBAClB,CAAC,CAAA;YACJ,CAAC;YAED,oDAAoD;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,IAAI,CAAC,IAAI,CAAC;wBACR,SAAS,EAAE,UAAU,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;wBACjD,GAAG,EAAE,CAAC;wBACN,UAAU,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;wBAC1D,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;qBAClB,CAAC,CAAA;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,IAAI,CAAA;QACb,CAAC;KACF;IACD;QACE,IAAI,EAAE,mBAAmB;QACzB,OAAO,EAAE;YACP,kBAAkB;YAClB,YAAY;YACZ,qBAAqB;YACrB,gBAAgB;SACjB,CAAC,IAAI,CAAC,IAAI,CAAC;QACZ,OAAO,EAAE,kBAAkB;QAC3B,WAAW,EAAE,sBAAsB;QACnC,QAAQ;YACN,MAAM,IAAI,GAA8B,EAAE,CAAA;YAE1C,0DAA0D;YAC1D,KAAK,MAAM,MAAM,IAAI,CAAC,YAAY,EAAE,WAAW,EAAE,YAAY,CAAC,EAAE,CAAC;gBAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC9B,IAAI,CAAC,IAAI,CAAC;wBACR,SAAS,EAAE,MAAM;wBACjB,GAAG,EAAE,CAAC;wBACN,UAAU,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,GAAG,EAAE,CAAC;wBACzC,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;qBAClB,CAAC,CAAA;gBACJ,CAAC;YACH,CAAC;YAED,oDAAoD;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,IAAI,CAAC,IAAI,CAAC;wBACR,SAAS,EAAE,SAAS,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;wBAChD,GAAG,EAAE,CAAC;wBACN,UAAU,EAAE,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC;wBAC1D,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC;qBAClB,CAAC,CAAA;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,IAAI,CAAA;QACb,CAAC;KACF;CACF,CAAA;AAED,MAAM,UAAU,GAAG,IAAI,CAAA;AAEvB,KAAK,UAAU,IAAI;IACjB,MAAM,GAAG,GAAG,cAAc,EAAE,CAAA;IAC5B,MAAM,QAAQ,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAA;IACxC,MAAM,EAAE,GAAG,GAAG,CAAC,kBAAkB,CAAA;IAEjC,IAAI,CAAC;QACH,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,KAAK,GAAG,GAAG,YAAY,IAAI,OAAO,CAAC,IAAI,EAAE,CAAA;YAC/C,MAAM,GAAG,GAAG,GAAG,EAAE,IAAI,KAAK,EAAE,CAAA;YAC5B,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,MAAM,CAAC,CAAA;YAEvC,MAAM,QAAQ,CAAC,OAAO,CAAC;qCACQ,GAAG;YAC5B,OAAO,CAAC,OAAO;;uBAEJ,OAAO,CAAC,WAAW;mBACvB,OAAO,CAAC,OAAO;OAC3B,CAAC,CAAA;YACF,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAA;YAE/B,MAAM,QAAQ,CAAC,OAAO,CAAC,kBAAkB,GAAG,EAAE,CAAC,CAAA;YAC/C,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAA;YAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;gBACjD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAA;gBAC3C,MAAM,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAA;gBACpD,OAAO,CAAC,GAAG,CAAC,cAAc,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,CAAC,MAAM,OAAO,CAAC,CAAA;YAC1F,CAAC;YAED,SAAS;YACT,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,QAAQ,CAAC,KAAK,CACnC,8BAA8B,GAAG,6CAA6C,CAC/E,CAAA;YACD,OAAO,CAAC,GAAG,CAAC,eAAe,MAAM,EAAE,GAAG,OAAO,CAAC,CAAA;QAChD,CAAC;IACH,CAAC;YAAS,CAAC;QACT,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAA;IACxB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;AACxB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;AACjB,CAAC,CAAC,CAAA"}
@@ -0,0 +1,40 @@
1
+ import type { PartitionInfo, SortKeyInfo } from './types.js';
2
+ export declare function queryPartitionInfo(input: {
3
+ database: string;
4
+ table: string;
5
+ from?: string;
6
+ to?: string;
7
+ query: <T>(sql: string) => Promise<T[]>;
8
+ }): Promise<PartitionInfo[]>;
9
+ export declare function querySortKeys(input: {
10
+ database: string;
11
+ table: string;
12
+ query: <T>(sql: string) => Promise<T[]>;
13
+ }): Promise<SortKeyInfo[]>;
14
+ export declare function querySortKeyInfo(input: {
15
+ database: string;
16
+ table: string;
17
+ query: <T>(sql: string) => Promise<T[]>;
18
+ }): Promise<SortKeyInfo | undefined>;
19
+ export declare function querySortKeyRanges(input: {
20
+ database: string;
21
+ table: string;
22
+ sortKeyColumn: string;
23
+ partitionIds: string[];
24
+ query: <T>(sql: string) => Promise<T[]>;
25
+ }): Promise<Map<string, {
26
+ min: string;
27
+ max: string;
28
+ }>>;
29
+ export declare function introspectTable(input: {
30
+ database: string;
31
+ table: string;
32
+ from?: string;
33
+ to?: string;
34
+ query: <T>(sql: string) => Promise<T[]>;
35
+ }): Promise<{
36
+ partitions: PartitionInfo[];
37
+ sortKey?: SortKeyInfo;
38
+ sortKeys: SortKeyInfo[];
39
+ }>;
40
+ //# sourceMappingURL=introspect.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"introspect.d.ts","sourceRoot":"","sources":["../../src/chunking/introspect.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,YAAY,CAAA;AAkB5D,wBAAsB,kBAAkB,CAAC,KAAK,EAAE;IAC9C,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,EAAE,CAAC,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,EAAE,CAAC,CAAA;CACxC,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC,CA+C3B;AA2FD,wBAAsB,aAAa,CAAC,KAAK,EAAE;IACzC,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,EAAE,CAAC,CAAA;CACxC,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CA6CzB;AAED,wBAAsB,gBAAgB,CAAC,KAAK,EAAE;IAC5C,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,EAAE,CAAC,CAAA;CACxC,GAAG,OAAO,CAAC,WAAW,GAAG,SAAS,CAAC,CAEnC;AAED,wBAAsB,kBAAkB,CAAC,KAAK,EAAE;IAC9C,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,aAAa,EAAE,MAAM,CAAA;IACrB,YAAY,EAAE,MAAM,EAAE,CAAA;IACtB,KAAK,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,EAAE,CAAC,CAAA;CACxC,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAiBrD;AAED,wBAAsB,eAAe,CAAC,KAAK,EAAE;IAC3C,QAAQ,EAAE,MAAM,CAAA;IAChB,KAAK,EAAE,MAAM,CAAA;IACb,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,EAAE,CAAC,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC,EAAE,CAAC,CAAA;CACxC,GAAG,OAAO,CAAC;IAAE,UAAU,EAAE,aAAa,EAAE,CAAC;IAAC,OAAO,CAAC,EAAE,WAAW,CAAC;IAAC,QAAQ,EAAE,WAAW,EAAE,CAAA;CAAE,CAAC,CAS3F"}
@@ -0,0 +1,187 @@
1
+ const NUMERIC_TYPES = new Set([
2
+ 'Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256',
3
+ 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256',
4
+ 'Float32', 'Float64',
5
+ ]);
6
+ const DATETIME_TYPES = new Set(['Date', 'Date32', 'DateTime', 'DateTime64']);
7
+ function classifySortKeyType(type) {
8
+ if (NUMERIC_TYPES.has(type))
9
+ return 'numeric';
10
+ if (DATETIME_TYPES.has(type))
11
+ return 'datetime';
12
+ if (type.startsWith('DateTime64('))
13
+ return 'datetime';
14
+ if (type.startsWith("DateTime('"))
15
+ return 'datetime';
16
+ return 'string';
17
+ }
18
+ export async function queryPartitionInfo(input) {
19
+ // Force replica sync on the target table before reading system.parts.
20
+ // select_sequential_consistency is only effective on user tables, not system
21
+ // tables, so this preliminary query ensures the replica has caught up with
22
+ // all pending writes before we inspect part metadata.
23
+ await input.query(`SELECT 1 FROM ${input.database}.${input.table} LIMIT 1 SETTINGS select_sequential_consistency = 1`);
24
+ const rows = await input.query(`SELECT
25
+ partition_id,
26
+ toString(sum(rows)) AS total_rows,
27
+ toString(sum(bytes_on_disk)) AS total_bytes,
28
+ toString(sum(data_uncompressed_bytes)) AS total_uncompressed_bytes,
29
+ toString(min(min_time)) AS min_time,
30
+ toString(max(max_time)) AS max_time
31
+ FROM system.parts
32
+ WHERE database = '${input.database}'
33
+ AND table = '${input.table}'
34
+ AND active = 1
35
+ GROUP BY partition_id
36
+ ORDER BY partition_id
37
+ SETTINGS select_sequential_consistency = 1`);
38
+ const partitions = rows.map((row) => ({
39
+ partitionId: row.partition_id,
40
+ rows: Number(row.total_rows),
41
+ bytesOnDisk: Number(row.total_bytes),
42
+ bytesUncompressed: Number(row.total_uncompressed_bytes ?? row.total_bytes),
43
+ minTime: new Date(row.min_time).toISOString(),
44
+ maxTime: new Date(row.max_time).toISOString(),
45
+ }));
46
+ return partitions.filter((p) => {
47
+ if (input.from && p.maxTime < input.from)
48
+ return false;
49
+ if (input.to && p.minTime >= input.to)
50
+ return false;
51
+ return true;
52
+ });
53
+ }
54
+ function extractSortKeyColumns(sortingKey) {
55
+ return splitTopLevelCsv(sortingKey)
56
+ .map((part) => part.trim())
57
+ .filter((part) => part.length > 0);
58
+ }
59
+ function splitTopLevelCsv(input) {
60
+ const parts = [];
61
+ let current = '';
62
+ let depth = 0;
63
+ let quote;
64
+ for (let index = 0; index < input.length; index++) {
65
+ const char = input[index];
66
+ if (char === undefined)
67
+ continue;
68
+ if (quote) {
69
+ current += char;
70
+ if (char === quote && input[index - 1] !== '\\') {
71
+ quote = undefined;
72
+ }
73
+ continue;
74
+ }
75
+ if (char === '\'' || char === '"') {
76
+ quote = char;
77
+ current += char;
78
+ continue;
79
+ }
80
+ if (char === '(') {
81
+ depth += 1;
82
+ current += char;
83
+ continue;
84
+ }
85
+ if (char === ')') {
86
+ depth = Math.max(0, depth - 1);
87
+ current += char;
88
+ continue;
89
+ }
90
+ if (char === ',' && depth === 0) {
91
+ parts.push(current.trim());
92
+ current = '';
93
+ continue;
94
+ }
95
+ current += char;
96
+ }
97
+ if (current.trim().length > 0) {
98
+ parts.push(current.trim());
99
+ }
100
+ return parts;
101
+ }
102
+ function resolveSortKeyColumn(expression, knownColumns) {
103
+ const trimmed = expression.trim();
104
+ if (knownColumns.has(trimmed)) {
105
+ return trimmed;
106
+ }
107
+ const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
108
+ .map((match) => match[0])
109
+ .filter((identifier) => Boolean(identifier));
110
+ const matches = Array.from(new Set(identifiers.filter((identifier) => knownColumns.has(identifier))));
111
+ if (matches.length === 1) {
112
+ return matches[0];
113
+ }
114
+ return undefined;
115
+ }
116
+ function resolveSortKeyColumnWithoutSchema(expression) {
117
+ const trimmed = expression.trim();
118
+ if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(trimmed)) {
119
+ return trimmed;
120
+ }
121
+ const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
122
+ .map((match) => match[0])
123
+ .filter((identifier) => Boolean(identifier));
124
+ return identifiers.length > 0 ? identifiers[identifiers.length - 1] : undefined;
125
+ }
126
+ export async function querySortKeys(input) {
127
+ const tableRows = await input.query(`SELECT sorting_key FROM system.tables WHERE database = '${input.database}' AND name = '${input.table}'`);
128
+ const sortingKey = tableRows[0]?.sorting_key;
129
+ if (!sortingKey)
130
+ return [];
131
+ const expressions = extractSortKeyColumns(sortingKey);
132
+ if (expressions.length === 0)
133
+ return [];
134
+ const columnRows = await input.query(`SELECT name, type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}'`);
135
+ const typeByName = new Map(columnRows
136
+ .filter((row) => Boolean(row.name))
137
+ .map((row) => [row.name, row.type]));
138
+ const knownColumns = new Set(typeByName.keys());
139
+ if (knownColumns.size === 0) {
140
+ return expressions.flatMap((expression, index) => {
141
+ const column = resolveSortKeyColumnWithoutSchema(expression);
142
+ const type = columnRows[index]?.type ?? columnRows[0]?.type;
143
+ if (!column || !type)
144
+ return [];
145
+ return [{
146
+ column,
147
+ type,
148
+ category: classifySortKeyType(type),
149
+ }];
150
+ });
151
+ }
152
+ return expressions.flatMap((expression) => {
153
+ const column = resolveSortKeyColumn(expression, knownColumns);
154
+ if (!column)
155
+ return [];
156
+ const type = typeByName.get(column) ?? 'String';
157
+ return [{
158
+ column,
159
+ type,
160
+ category: classifySortKeyType(type),
161
+ }];
162
+ });
163
+ }
164
+ export async function querySortKeyInfo(input) {
165
+ return (await querySortKeys(input))[0];
166
+ }
167
+ export async function querySortKeyRanges(input) {
168
+ if (input.partitionIds.length === 0)
169
+ return new Map();
170
+ const inList = input.partitionIds.map((id) => `'${id}'`).join(', ');
171
+ const rows = await input.query(`SELECT _partition_id AS partition_id, toString(min(${input.sortKeyColumn})) AS min_val, toString(max(${input.sortKeyColumn})) AS max_val FROM ${input.database}.${input.table} WHERE _partition_id IN (${inList}) GROUP BY _partition_id SETTINGS select_sequential_consistency = 1`);
172
+ const result = new Map();
173
+ for (const row of rows) {
174
+ result.set(row.partition_id, { min: row.min_val, max: row.max_val });
175
+ }
176
+ return result;
177
+ }
178
+ export async function introspectTable(input) {
179
+ const partitions = await queryPartitionInfo(input);
180
+ const sortKeys = await querySortKeys({
181
+ database: input.database,
182
+ table: input.table,
183
+ query: input.query,
184
+ });
185
+ return { partitions, sortKey: sortKeys[0], sortKeys };
186
+ }
187
+ //# sourceMappingURL=introspect.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"introspect.js","sourceRoot":"","sources":["../../src/chunking/introspect.ts"],"names":[],"mappings":"AAEA,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC;IAC5B,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ;IACrD,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;IAC3D,SAAS,EAAE,SAAS;CACrB,CAAC,CAAA;AAEF,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,UAAU,EAAE,YAAY,CAAC,CAAC,CAAA;AAE5E,SAAS,mBAAmB,CAAC,IAAY;IACvC,IAAI,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,SAAS,CAAA;IAC7C,IAAI,cAAc,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,UAAU,CAAA;IAC/C,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,UAAU,CAAA;IACrD,IAAI,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC;QAAE,OAAO,UAAU,CAAA;IACpD,OAAO,QAAQ,CAAA;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,KAMxC;IACC,sEAAsE;IACtE,6EAA6E;IAC7E,2EAA2E;IAC3E,sDAAsD;IACtD,MAAM,KAAK,CAAC,KAAK,CACf,iBAAiB,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,qDAAqD,CACpG,CAAA;IAED,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,KAAK,CAQ5B;;;;;;;;oBAQgB,KAAK,CAAC,QAAQ;iBACjB,KAAK,CAAC,KAAK;;;;2CAIe,CACxC,CAAA;IAED,MAAM,UAAU,GAAoB,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACrD,WAAW,EAAE,GAAG,CAAC,YAAY;QAC7B,IAAI,EAAE,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC;QAC5B,WAAW,EAAE,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC;QACpC,iBAAiB,EAAE,MAAM,CAAC,GAAG,CAAC,wBAAwB,IAAI,GAAG,CAAC,WAAW,CAAC;QAC1E,OAAO,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE;QAC7C,OAAO,EAAE,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE;KAC9C,CAAC,CAAC,CAAA;IAEH,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QAC7B,IAAI,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC,IAAI;YAAE,OAAO,KAAK,CAAA;QACtD,IAAI,KAAK,CAAC,EAAE,IAAI,CAAC,CAAC,OAAO,IAAI,KAAK,CAAC,EAAE;YAAE,OAAO,KAAK,CAAA;QACnD,OAAO,IAAI,CAAA;IACb,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,qBAAqB,CAAC,UAAkB;IAC/C,OAAO,gBAAgB,CAAC,UAAU,CAAC;SAChC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAkB,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;AACtD,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa;IACrC,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,OAAO,GAAG,EAAE,CAAA;IAChB,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,KAA4B,CAAA;IAEhC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAClD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAA;QACzB,IAAI,IAAI,KAAK,SAAS;YAAE,SAAQ;QAEhC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,IAAI,IAAI,CAAA;YACf,IAAI,IAAI,KAAK,KAAK,IAAI,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;gBAChD,KAAK,GAAG,SAAS,CAAA;YACnB,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YAClC,KAAK,GAAG,IAAI,CAAA;YACZ,OAAO,IAAI,IAAI,CAAA;YACf,SAAQ;QACV,CAAC;QAED,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACjB,KAAK,IAAI,CAAC,CAAA;YACV,OAAO,IAAI,IAAI,CAAA;YACf,SAAQ;QACV,CAAC;QAED,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACjB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,CAAA;YAC9B,OAAO,IAAI,IAAI,CAAA;YACf,SAAQ;QACV,CAAC;QAED,IAAI,IAAI,KAAK,GAAG,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YAChC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;YAC1B,OAAO,GAAG,EAAE,CAAA;YACZ,SAAQ;QACV,CAAC;QAED,OAAO,IAAI,IAAI,CAAA;IACjB,CAAC;IAED,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAA;IAC5B,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC;AAED,SAAS,oBAAoB,CAAC,UAAkB,EAAE,YAAyB;IACzE,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAA;IACjC,IAAI,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;QAC9B,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC;SAC5E,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;SACxB,MAAM,CAAC,CAAC,UAAU,EAAwB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAA;IAEpE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAA;IACrG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,OAAO,CAAC,CAAC,CAAC,CAAA;IACnB,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,SAAS,iCAAiC,CAAC,UAAkB;IAC3D,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAA;IACjC,IAAI,0BAA0B,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC7C,OAAO,OAAO,CAAA;IAChB,CAAC;IAED,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,6BAA6B,CAAC,CAAC;SAC5E,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;SACxB,MAAM,CAAC,CAAC,UAAU,EAAwB,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAA;IAEpE,OAAO,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;AACjF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,KAInC;IACC,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,KAAK,CACjC,2DAA2D,KAAK,CAAC,QAAQ,iBAAiB,KAAK,CAAC,KAAK,GAAG,CACzG,CAAA;IAED,MAAM,UAAU,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,WAAW,CAAA;IAC5C,IAAI,CAAC,UAAU;QAAE,OAAO,EAAE,CAAA;IAE1B,MAAM,WAAW,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAA;IACrD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAEvC,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,KAAK,CAClC,2DAA2D,KAAK,CAAC,QAAQ,kBAAkB,KAAK,CAAC,KAAK,GAAG,CAC1G,CAAA;IACD,MAAM,UAAU,GAAG,IAAI,GAAG,CACxB,UAAU;SACP,MAAM,CAAC,CAAC,GAAG,EAAyC,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;SACzE,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CACtC,CAAA;IACD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAA;IAE/C,IAAI,YAAY,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAC5B,OAAO,WAAW,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE;YAC/C,MAAM,MAAM,GAAG,iCAAiC,CAAC,UAAU,CAAC,CAAA;YAC5D,MAAM,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,EAAE,IAAI,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,IAAI,CAAA;YAC3D,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI;gBAAE,OAAO,EAAE,CAAA;YAE/B,OAAO,CAAC;oBACN,MAAM;oBACN,IAAI;oBACJ,QAAQ,EAAE,mBAAmB,CAAC,IAAI,CAAC;iBACpC,CAAC,CAAA;QACJ,CAAC,CAAC,CAAA;IACJ,CAAC;IAED,OAAO,WAAW,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,EAAE;QACxC,MAAM,MAAM,GAAG,oBAAoB,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;QAC7D,IAAI,CAAC,MAAM;YAAE,OAAO,EAAE,CAAA;QACtB,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAA;QAC/C,OAAO,CAAC;gBACN,MAAM;gBACN,IAAI;gBACJ,QAAQ,EAAE,mBAAmB,CAAC,IAAI,CAAC;aACpC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,KAItC;IACC,OAAO,CAAC,MAAM,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACxC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,KAMxC;IACC,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,GAAG,EAAE,CAAA;IAErD,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACnE,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,KAAK,CAK5B,sDAAsD,KAAK,CAAC,aAAa,+BAA+B,KAAK,CAAC,aAAa,sBAAsB,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,KAAK,4BAA4B,MAAM,qEAAqE,CACtR,CAAA;IAED,MAAM,MAAM,GAAG,IAAI,GAAG,EAAwC,CAAA;IAC9D,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,OAAO,EAAE,GAAG,EAAE,GAAG,CAAC,OAAO,EAAE,CAAC,CAAA;IACtE,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,KAMrC;IACC,MAAM,UAAU,GAAG,MAAM,kBAAkB,CAAC,KAAK,CAAC,CAAA;IAClD,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC;QACnC,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;KACnB,CAAC,CAAA;IAEF,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAA;AACvD,CAAC"}
@@ -0,0 +1,14 @@
1
+ import type { ChunkEstimate, EstimateConfidence, EstimateReason, Partition, PartitionSlice, ChunkDerivationStep, ChunkRange } from './types.js';
2
+ export declare function buildRootSlice(partition: Partition): PartitionSlice;
3
+ export declare function buildSliceEstimate(partition: Partition, rows: number, confidence: EstimateConfidence, reason: EstimateReason): ChunkEstimate;
4
+ export declare function buildSliceFromRows(partition: Partition, input: {
5
+ ranges: ChunkRange[];
6
+ rows: number;
7
+ focusedValue?: PartitionSlice['analysis']['focusedValue'];
8
+ confidence: EstimateConfidence;
9
+ reason: EstimateReason;
10
+ lineage: ChunkDerivationStep[];
11
+ }): PartitionSlice;
12
+ export declare function getTargetChunkRows(partition: Partition, targetChunkBytes: number): number;
13
+ export declare function mergeAdjacentSlices(slices: PartitionSlice[], targetChunkBytes: number): PartitionSlice[];
14
+ //# sourceMappingURL=partition-slices.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"partition-slices.d.ts","sourceRoot":"","sources":["../../src/chunking/partition-slices.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,aAAa,EACb,kBAAkB,EAClB,cAAc,EACd,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,UAAU,EACX,MAAM,YAAY,CAAA;AAEnB,wBAAgB,cAAc,CAAC,SAAS,EAAE,SAAS,GAAG,cAAc,CAenE;AAED,wBAAgB,kBAAkB,CAChC,SAAS,EAAE,SAAS,EACpB,IAAI,EAAE,MAAM,EACZ,UAAU,EAAE,kBAAkB,EAC9B,MAAM,EAAE,cAAc,GACrB,aAAa,CAef;AAED,wBAAgB,kBAAkB,CAChC,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE;IACL,MAAM,EAAE,UAAU,EAAE,CAAA;IACpB,IAAI,EAAE,MAAM,CAAA;IACZ,YAAY,CAAC,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC,cAAc,CAAC,CAAA;IACzD,UAAU,EAAE,kBAAkB,CAAA;IAC9B,MAAM,EAAE,cAAc,CAAA;IACtB,OAAO,EAAE,mBAAmB,EAAE,CAAA;CAC/B,GACA,cAAc,CAUhB;AAED,wBAAgB,kBAAkB,CAChC,SAAS,EAAE,SAAS,EACpB,gBAAgB,EAAE,MAAM,GACvB,MAAM,CAGR;AAED,wBAAgB,mBAAmB,CACjC,MAAM,EAAE,cAAc,EAAE,EACxB,gBAAgB,EAAE,MAAM,GACvB,cAAc,EAAE,CAuClB"}
@@ -0,0 +1,111 @@
1
+ export function buildRootSlice(partition) {
2
+ return {
3
+ partitionId: partition.partitionId,
4
+ ranges: [],
5
+ estimate: {
6
+ rows: partition.rows,
7
+ bytesCompressed: partition.bytesCompressed,
8
+ bytesUncompressed: partition.bytesUncompressed,
9
+ confidence: 'high',
10
+ reason: 'partition-metadata',
11
+ },
12
+ analysis: {
13
+ lineage: [],
14
+ },
15
+ };
16
+ }
17
+ export function buildSliceEstimate(partition, rows, confidence, reason) {
18
+ const bytesCompressed = partition.rows > 0
19
+ ? Math.round((rows / partition.rows) * partition.bytesCompressed)
20
+ : 0;
21
+ const bytesUncompressed = partition.rows > 0
22
+ ? Math.round((rows / partition.rows) * partition.bytesUncompressed)
23
+ : 0;
24
+ return {
25
+ rows,
26
+ bytesCompressed,
27
+ bytesUncompressed,
28
+ confidence,
29
+ reason,
30
+ };
31
+ }
32
+ export function buildSliceFromRows(partition, input) {
33
+ return {
34
+ partitionId: partition.partitionId,
35
+ ranges: input.ranges,
36
+ estimate: buildSliceEstimate(partition, input.rows, input.confidence, input.reason),
37
+ analysis: {
38
+ focusedValue: input.focusedValue,
39
+ lineage: input.lineage,
40
+ },
41
+ };
42
+ }
43
+ export function getTargetChunkRows(partition, targetChunkBytes) {
44
+ if (partition.bytesUncompressed <= 0)
45
+ return partition.rows;
46
+ return (targetChunkBytes * partition.rows) / partition.bytesUncompressed;
47
+ }
48
+ export function mergeAdjacentSlices(slices, targetChunkBytes) {
49
+ if (slices.length <= 1)
50
+ return slices;
51
+ const merged = [];
52
+ let current;
53
+ for (const slice of slices) {
54
+ if (!current) {
55
+ current = slice;
56
+ continue;
57
+ }
58
+ const canMerge = !current.analysis.focusedValue &&
59
+ !slice.analysis.focusedValue &&
60
+ haveSameTrailingRanges(current.ranges, slice.ranges) &&
61
+ current.estimate.bytesUncompressed + slice.estimate.bytesUncompressed <= targetChunkBytes * 1.1;
62
+ if (!canMerge) {
63
+ merged.push(current);
64
+ current = slice;
65
+ continue;
66
+ }
67
+ current = {
68
+ ...current,
69
+ ranges: mergeRanges(current.ranges, slice.ranges),
70
+ estimate: {
71
+ ...current.estimate,
72
+ rows: current.estimate.rows + slice.estimate.rows,
73
+ bytesCompressed: current.estimate.bytesCompressed + slice.estimate.bytesCompressed,
74
+ bytesUncompressed: current.estimate.bytesUncompressed + slice.estimate.bytesUncompressed,
75
+ },
76
+ };
77
+ }
78
+ if (current)
79
+ merged.push(current);
80
+ return merged;
81
+ }
82
+ function mergeRanges(left, right) {
83
+ return left.map((leftRange) => {
84
+ const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex);
85
+ return rightRange === undefined
86
+ ? leftRange
87
+ : {
88
+ dimensionIndex: leftRange.dimensionIndex,
89
+ from: leftRange.from,
90
+ to: rightRange.to,
91
+ };
92
+ });
93
+ }
94
+ function haveSameTrailingRanges(left, right) {
95
+ if (left.length !== right.length)
96
+ return false;
97
+ let differingDimensions = 0;
98
+ for (const leftRange of left) {
99
+ const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex);
100
+ if (!rightRange)
101
+ return false;
102
+ const same = leftRange.from === rightRange.from && leftRange.to === rightRange.to;
103
+ if (!same) {
104
+ differingDimensions += 1;
105
+ if (leftRange.to !== rightRange.from)
106
+ return false;
107
+ }
108
+ }
109
+ return differingDimensions <= 1;
110
+ }
111
+ //# sourceMappingURL=partition-slices.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"partition-slices.js","sourceRoot":"","sources":["../../src/chunking/partition-slices.ts"],"names":[],"mappings":"AAUA,MAAM,UAAU,cAAc,CAAC,SAAoB;IACjD,OAAO;QACL,WAAW,EAAE,SAAS,CAAC,WAAW;QAClC,MAAM,EAAE,EAAE;QACV,QAAQ,EAAE;YACR,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,eAAe,EAAE,SAAS,CAAC,eAAe;YAC1C,iBAAiB,EAAE,SAAS,CAAC,iBAAiB;YAC9C,UAAU,EAAE,MAAM;YAClB,MAAM,EAAE,oBAAoB;SAC7B;QACD,QAAQ,EAAE;YACR,OAAO,EAAE,EAAE;SACZ;KACF,CAAA;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,SAAoB,EACpB,IAAY,EACZ,UAA8B,EAC9B,MAAsB;IAEtB,MAAM,eAAe,GAAG,SAAS,CAAC,IAAI,GAAG,CAAC;QACxC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,eAAe,CAAC;QACjE,CAAC,CAAC,CAAC,CAAA;IACL,MAAM,iBAAiB,GAAG,SAAS,CAAC,IAAI,GAAG,CAAC;QAC1C,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,iBAAiB,CAAC;QACnE,CAAC,CAAC,CAAC,CAAA;IAEL,OAAO;QACL,IAAI;QACJ,eAAe;QACf,iBAAiB;QACjB,UAAU;QACV,MAAM;KACP,CAAA;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,SAAoB,EACpB,KAOC;IAED,OAAO;QACL,WAAW,EAAE,SAAS,CAAC,WAAW;QAClC,MAAM,EAAE,KAAK,CAAC,MAAM;QACpB,QAAQ,EAAE,kBAAkB,CAAC,SAAS,EAAE,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,MAAM,CAAC;QACnF,QAAQ,EAAE;YACR,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB;KACF,CAAA;AACH,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,SAAoB,EACpB,gBAAwB;IAExB,IAAI,SAAS,CAAC,iBAAiB,IAAI,CAAC;QAAE,OAAO,SAAS,CAAC,IAAI,CAAA;IAC3D,OAAO,CAAC,gBAAgB,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,iBAAiB,CAAA;AAC1E,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,MAAwB,EACxB,gBAAwB;IAExB,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,MAAM,CAAA;IAErC,MAAM,MAAM,GAAqB,EAAE,CAAA;IACnC,IAAI,OAAmC,CAAA;IAEvC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,GAAG,KAAK,CAAA;YACf,SAAQ;QACV,CAAC;QAED,MAAM,QAAQ,GACZ,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY;YAC9B,CAAC,KAAK,CAAC,QAAQ,CAAC,YAAY;YAC5B,sBAAsB,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC;YACpD,OAAO,CAAC,QAAQ,CAAC,iBAAiB,GAAG,KAAK,CAAC,QAAQ,CAAC,iBAAiB,IAAI,gBAAgB,GAAG,GAAG,CAAA;QAEjG,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACpB,OAAO,GAAG,KAAK,CAAA;YACf,SAAQ;QACV,CAAC;QAED,OAAO,GAAG;YACR,GAAG,OAAO;YACV,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC;YACjD,QAAQ,EAAE;gBACR,GAAG,OAAO,CAAC,QAAQ;gBACnB,IAAI,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI;gBACjD,eAAe,EAAE,OAAO,CAAC,QAAQ,CAAC,eAAe,GAAG,KAAK,CAAC,QAAQ,CAAC,eAAe;gBAClF,iBAAiB,EAAE,OAAO,CAAC,QAAQ,CAAC,iBAAiB,GAAG,KAAK,CAAC,QAAQ,CAAC,iBAAiB;aAEzF;SACF,CAAA;IACH,CAAC;IAED,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACjC,OAAO,MAAM,CAAA;AACf,CAAC;AAED,SAAS,WAAW,CAAC,IAAkB,EAAE,KAAmB;IAC1D,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;QAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,cAAc,KAAK,SAAS,CAAC,cAAc,CAAC,CAAA;QACnG,OAAO,UAAU,KAAK,SAAS;YAC7B,CAAC,CAAC,SAAS;YACX,CAAC,CAAC;gBACA,cAAc,EAAE,SAAS,CAAC,cAAc;gBACxC,IAAI,EAAE,SAAS,CAAC,IAAI;gBACpB,EAAE,EAAE,UAAU,CAAC,EAAE;aAClB,CAAA;IACL,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,sBAAsB,CAAC,IAAkB,EAAE,KAAmB;IACrE,IAAI,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM;QAAE,OAAO,KAAK,CAAA;IAE9C,IAAI,mBAAmB,GAAG,CAAC,CAAA;IAE3B,KAAK,MAAM,SAAS,IAAI,IAAI,EAAE,CAAC;QAC7B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,cAAc,KAAK,SAAS,CAAC,cAAc,CAAC,CAAA;QACnG,IAAI,CAAC,UAAU;YAAE,OAAO,KAAK,CAAA;QAE7B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,KAAK,UAAU,CAAC,IAAI,IAAI,SAAS,CAAC,EAAE,KAAK,UAAU,CAAC,EAAE,CAAA;QACjF,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,mBAAmB,IAAI,CAAC,CAAA;YACxB,IAAI,SAAS,CAAC,EAAE,KAAK,UAAU,CAAC,IAAI;gBAAE,OAAO,KAAK,CAAA;QACpD,CAAC;IACH,CAAC;IAED,OAAO,mBAAmB,IAAI,CAAC,CAAA;AACjC,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { ChunkPlan, GenerateChunkPlanInput } from './types.js';
2
+ export declare function generateChunkPlan(input: GenerateChunkPlanInput): Promise<ChunkPlan>;
3
+ //# sourceMappingURL=planner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"planner.d.ts","sourceRoot":"","sources":["../../src/chunking/planner.ts"],"names":[],"mappings":"AAyBA,OAAO,KAAK,EAEV,SAAS,EACT,sBAAsB,EAQvB,MAAM,YAAY,CAAA;AASnB,wBAAsB,iBAAiB,CAAC,KAAK,EAAE,sBAAsB,GAAG,OAAO,CAAC,SAAS,CAAC,CAyEzF"}