retold-facto 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.claude/launch.json +11 -0
  2. package/.dockerignore +8 -0
  3. package/.quackage.json +19 -0
  4. package/Dockerfile +26 -0
  5. package/bin/retold-facto.js +909 -0
  6. package/examples/facto-government-data.sqlite +0 -0
  7. package/examples/government-data-catalog.json +137 -0
  8. package/examples/government-data-loader.js +1432 -0
  9. package/package.json +91 -0
  10. package/scripts/facto-download.js +425 -0
  11. package/source/Retold-Facto.js +1042 -0
  12. package/source/services/Retold-Facto-BeaconProvider.js +511 -0
  13. package/source/services/Retold-Facto-CatalogManager.js +1252 -0
  14. package/source/services/Retold-Facto-DataLakeService.js +1642 -0
  15. package/source/services/Retold-Facto-DatasetManager.js +417 -0
  16. package/source/services/Retold-Facto-IngestEngine.js +1315 -0
  17. package/source/services/Retold-Facto-ProjectionEngine.js +3960 -0
  18. package/source/services/Retold-Facto-RecordManager.js +360 -0
  19. package/source/services/Retold-Facto-SchemaManager.js +1110 -0
  20. package/source/services/Retold-Facto-SourceFolderScanner.js +2243 -0
  21. package/source/services/Retold-Facto-SourceManager.js +730 -0
  22. package/source/services/Retold-Facto-StoreConnectionManager.js +441 -0
  23. package/source/services/Retold-Facto-ThroughputMonitor.js +478 -0
  24. package/source/services/web-app/codemirror-entry.js +7 -0
  25. package/source/services/web-app/pict-app/Pict-Application-Facto-Configuration.json +9 -0
  26. package/source/services/web-app/pict-app/Pict-Application-Facto.js +70 -0
  27. package/source/services/web-app/pict-app/Pict-Facto-Bundle.js +11 -0
  28. package/source/services/web-app/pict-app/providers/Pict-Provider-Facto-UI.js +66 -0
  29. package/source/services/web-app/pict-app/providers/Pict-Provider-Facto.js +69 -0
  30. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Catalog.js +93 -0
  31. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Connections.js +42 -0
  32. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Datasets.js +605 -0
  33. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Projections.js +188 -0
  34. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Scanner.js +80 -0
  35. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Schema.js +116 -0
  36. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Sources.js +104 -0
  37. package/source/services/web-app/pict-app/views/PictView-Facto-Catalog.js +526 -0
  38. package/source/services/web-app/pict-app/views/PictView-Facto-Datasets.js +173 -0
  39. package/source/services/web-app/pict-app/views/PictView-Facto-Ingest.js +259 -0
  40. package/source/services/web-app/pict-app/views/PictView-Facto-Layout.js +191 -0
  41. package/source/services/web-app/pict-app/views/PictView-Facto-Projections.js +231 -0
  42. package/source/services/web-app/pict-app/views/PictView-Facto-Records.js +326 -0
  43. package/source/services/web-app/pict-app/views/PictView-Facto-Scanner.js +624 -0
  44. package/source/services/web-app/pict-app/views/PictView-Facto-Sources.js +201 -0
  45. package/source/services/web-app/pict-app/views/PictView-Facto-Throughput.js +456 -0
  46. package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full-Configuration.json +14 -0
  47. package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full.js +391 -0
  48. package/source/services/web-app/pict-app-full/providers/PictRouter-Facto-Configuration.json +56 -0
  49. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-BottomBar.js +68 -0
  50. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Connections.js +340 -0
  51. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboard.js +149 -0
  52. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboards.js +819 -0
  53. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Datasets.js +178 -0
  54. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-IngestJobs.js +99 -0
  55. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Layout.js +62 -0
  56. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-MappingEditor.js +158 -0
  57. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-ProjectionDetail.js +1120 -0
  58. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Projections.js +172 -0
  59. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-QueryPanel.js +119 -0
  60. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-RecordViewer.js +663 -0
  61. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Records.js +648 -0
  62. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Scanner.js +1017 -0
  63. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDetail.js +1404 -0
  64. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDocEditor.js +1036 -0
  65. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaEditor.js +636 -0
  66. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaResearch.js +357 -0
  67. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceDetail.js +822 -0
  68. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceEditor.js +1036 -0
  69. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceResearch.js +487 -0
  70. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Sources.js +165 -0
  71. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Throughput.js +439 -0
  72. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-TopBar.js +335 -0
  73. package/source/services/web-app/pict-app-full/views/projections/Facto-Projections-Constants.js +71 -0
  74. package/source/services/web-app/web/chart.min.js +20 -0
  75. package/source/services/web-app/web/codemirror-bundle.js +30099 -0
  76. package/source/services/web-app/web/css/facto-themes.css +467 -0
  77. package/source/services/web-app/web/css/facto.css +502 -0
  78. package/source/services/web-app/web/index.html +28 -0
  79. package/source/services/web-app/web/retold-facto.js +12138 -0
  80. package/source/services/web-app/web/retold-facto.js.map +1 -0
  81. package/source/services/web-app/web/retold-facto.min.js +2 -0
  82. package/source/services/web-app/web/retold-facto.min.js.map +1 -0
  83. package/source/services/web-app/web/simple/index.html +17 -0
  84. package/test/Facto_Browser_Integration_tests.js +798 -0
  85. package/test/RetoldFacto_tests.js +4117 -0
  86. package/test/fixtures/weather-readings.csv +17 -0
  87. package/test/fixtures/weather-stations.csv +9 -0
  88. package/test/model/MeadowModel-Extended.json +8497 -0
  89. package/test/model/MeadowModel-PICT.json +1 -0
  90. package/test/model/MeadowModel.json +1355 -0
  91. package/test/model/ddl/Facto.ddl +225 -0
  92. package/test/model/fable-configuration.json +14 -0
package/package.json ADDED
@@ -0,0 +1,91 @@
1
+ {
2
+ "name": "retold-facto",
3
+ "version": "0.0.4",
4
+ "description": "Data warehouse and knowledge graph storage for the Retold ecosystem.",
5
+ "main": "source/Retold-Facto.js",
6
+ "bin": {
7
+ "retold-facto": "bin/retold-facto.js"
8
+ },
9
+ "scripts": {
10
+ "start": "node bin/retold-facto.js",
11
+ "coverage": "npx quack coverage",
12
+ "test": "npx quack test",
13
+ "test-browser": "npx mocha test/Facto_Browser_Integration_tests.js -u tdd --exit --timeout 120000",
14
+ "build": "npx quack build",
15
+ "build-codemirror": "npx esbuild source/services/web-app/codemirror-entry.js --bundle --outfile=source/services/web-app/web/codemirror-bundle.js --format=iife --global-name=CodeMirrorModules --platform=browser --target=es2018",
16
+ "build-test-model": "cd test && npx stricture -i model/ddl/Facto.ddl",
17
+ "docker-build": "docker build -t retold-facto .",
18
+ "docker-run": "docker run -p 8386:8386 retold-facto"
19
+ },
20
+ "mocha": {
21
+ "spec": "test/RetoldFacto_tests.js",
22
+ "diff": true,
23
+ "extension": [
24
+ "js"
25
+ ],
26
+ "package": "./package.json",
27
+ "reporter": "spec",
28
+ "slow": "75",
29
+ "timeout": "5000",
30
+ "ui": "tdd",
31
+ "watch-files": [
32
+ "source/**/*.js",
33
+ "test/**/*.js"
34
+ ],
35
+ "watch-ignore": [
36
+ "lib/vendor"
37
+ ]
38
+ },
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "https://github.com/stevenvelozo/retold-facto.git"
42
+ },
43
+ "keywords": [
44
+ "data",
45
+ "warehouse",
46
+ "knowledge-graph",
47
+ "certainty",
48
+ "ingest"
49
+ ],
50
+ "author": "Steven Velozo <steven@velozo.com> (http://velozo.com/)",
51
+ "license": "MIT",
52
+ "bugs": {
53
+ "url": "https://github.com/stevenvelozo/retold-facto/issues"
54
+ },
55
+ "homepage": "https://github.com/stevenvelozo/retold-facto",
56
+ "devDependencies": {
57
+ "chai": "^4.5.0",
58
+ "meadow-connection-sqlite": "^1.0.18",
59
+ "puppeteer": "^24.8.0",
60
+ "quackage": "^1.0.65",
61
+ "stricture": "^4.0.2",
62
+ "supertest": "^7.2.2"
63
+ },
64
+ "dependencies": {
65
+ "@codemirror/lang-markdown": "^6.5.0",
66
+ "@codemirror/state": "^6.6.0",
67
+ "@codemirror/view": "^6.40.0",
68
+ "bibliograph": "^0.1.4",
69
+ "codemirror": "^6.0.2",
70
+ "fable": "^3.1.67",
71
+ "fable-serviceproviderbase": "^3.0.19",
72
+ "fast-xml-parser": "^5.5.8",
73
+ "meadow": "^2.0.33",
74
+ "meadow-connection-manager": "^1.0.0",
75
+ "meadow-connection-mysql": "^1.0.14",
76
+ "meadow-endpoints": "^4.0.14",
77
+ "meadow-integration": "^1.0.21",
78
+ "orator": "^6.0.4",
79
+ "orator-serviceserver-restify": "^2.0.9",
80
+ "orator-static-server": "^2.0.4",
81
+ "pict": "^1.0.359",
82
+ "pict-router": "^1.0.9",
83
+ "pict-section-flow": "^0.0.17",
84
+ "pict-section-modal": "^0.0.1",
85
+ "pict-section-histogram": "^1.0.0",
86
+ "pict-section-markdowneditor": "^1.0.7",
87
+ "pict-section-objecteditor": "^1.0.1",
88
+ "stricture": "^4.0.2",
89
+ "xlsx": "^0.18.5"
90
+ }
91
+ }
@@ -0,0 +1,425 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Retold Facto — Offline Data Lake Downloader (CLI)
4
+ *
5
+ * Thin CLI wrapper around RetoldFactoDataLakeService.
6
+ * Reads download-catalog.json and downloads public datasets into a
7
+ * well-organized folder structure for stable, repeatable ingestion.
8
+ *
9
+ * Usage:
10
+ * node scripts/facto-download.js --dry-run
11
+ * node scripts/facto-download.js --dry-run --tier 2
12
+ * node scripts/facto-download.js --id iso-4217-currencies
13
+ * node scripts/facto-download.js --list
14
+ * node scripts/facto-download.js --category 01_foundational_reference
15
+ * node scripts/facto-download.js --tier 2 --force
16
+ *
17
+ * @author Steven Velozo <steven@velozo.com>
18
+ */
19
+ const libFable = require('fable');
20
+ const libPath = require('path');
21
+
22
+ const libRetoldFactoDataLakeService = require('../source/services/Retold-Facto-DataLakeService.js');
23
+
24
+ // ================================================================
25
+ // Constants
26
+ // ================================================================
27
+
28
+ const CATALOG_PATH = libPath.join(__dirname, '..', 'documentation', 'datasets', 'download-catalog.json');
29
+ const DEFAULT_DATA_DIR = libPath.join(__dirname, '..', 'dist', 'retold-facto-raw-data');
30
+
31
+ // ================================================================
32
+ // CLI Argument Parsing
33
+ // ================================================================
34
+
35
+ let _Options = (
36
+ {
37
+ dryRun: false,
38
+ list: false,
39
+ force: false,
40
+ tier: null,
41
+ category: null,
42
+ id: null,
43
+ dataDir: DEFAULT_DATA_DIR
44
+ });
45
+
46
+ let tmpArgs = process.argv.slice(2);
47
+ for (let i = 0; i < tmpArgs.length; i++)
48
+ {
49
+ if (tmpArgs[i] === '--dry-run' || tmpArgs[i] === '-n')
50
+ {
51
+ _Options.dryRun = true;
52
+ }
53
+ else if (tmpArgs[i] === '--list' || tmpArgs[i] === '-l')
54
+ {
55
+ _Options.list = true;
56
+ }
57
+ else if (tmpArgs[i] === '--force' || tmpArgs[i] === '-f')
58
+ {
59
+ _Options.force = true;
60
+ }
61
+ else if ((tmpArgs[i] === '--tier' || tmpArgs[i] === '-t') && tmpArgs[i + 1])
62
+ {
63
+ _Options.tier = parseInt(tmpArgs[i + 1], 10);
64
+ i++;
65
+ }
66
+ else if ((tmpArgs[i] === '--category' || tmpArgs[i] === '-c') && tmpArgs[i + 1])
67
+ {
68
+ _Options.category = tmpArgs[i + 1];
69
+ i++;
70
+ }
71
+ else if ((tmpArgs[i] === '--id') && tmpArgs[i + 1])
72
+ {
73
+ _Options.id = tmpArgs[i + 1];
74
+ i++;
75
+ }
76
+ else if ((tmpArgs[i] === '--data-dir' || tmpArgs[i] === '-d') && tmpArgs[i + 1])
77
+ {
78
+ _Options.dataDir = libPath.resolve(tmpArgs[i + 1]);
79
+ i++;
80
+ }
81
+ else if (tmpArgs[i] === '--skip-api')
82
+ {
83
+ // Default behavior — flag exists for explicitness
84
+ }
85
+ else if (tmpArgs[i] === '--help' || tmpArgs[i] === '-h')
86
+ {
87
+ console.log('Retold Facto — Offline Data Lake Downloader');
88
+ console.log('');
89
+ console.log('Usage:');
90
+ console.log(' node scripts/facto-download.js [options]');
91
+ console.log('');
92
+ console.log('Options:');
93
+ console.log(' --dry-run, -n Show what would be downloaded without downloading');
94
+ console.log(' --list, -l List all datasets with cached/missing status');
95
+ console.log(' --tier, -t <n> Only datasets at tier N or lower');
96
+ console.log(' --category, -c <name> Only datasets in this category');
97
+ console.log(' --id <id> Download a single dataset by ID');
98
+ console.log(' --data-dir, -d <path> Override data directory (default: data/)');
99
+ console.log(' --force, -f Re-download even if already cached');
100
+ console.log(' --skip-api Skip API-only datasets (default behavior)');
101
+ console.log(' --help, -h Show this help');
102
+ console.log('');
103
+ console.log('Categories:');
104
+ console.log(' 01_foundational_reference');
105
+ console.log(' 02_geographic_location');
106
+ console.log(' 03_people_cultural_entities');
107
+ console.log(' 04_business_industry');
108
+ console.log(' 05_media_entertainment');
109
+ console.log('');
110
+ console.log('Examples:');
111
+ console.log(' node scripts/facto-download.js --dry-run');
112
+ console.log(' node scripts/facto-download.js --dry-run --tier 1');
113
+ console.log(' node scripts/facto-download.js --id iso-4217-currencies');
114
+ console.log(' node scripts/facto-download.js --tier 2');
115
+ console.log(' node scripts/facto-download.js --list');
116
+ process.exit(0);
117
+ }
118
+ else
119
+ {
120
+ console.error(`Unknown argument: ${tmpArgs[i]}`);
121
+ console.error('Use --help for usage information.');
122
+ process.exit(1);
123
+ }
124
+ }
125
+
126
+ // ================================================================
127
+ // Reporting Helpers
128
+ // ================================================================
129
+
130
+ function printStatusLine(pService, pEntry, pStatus)
131
+ {
132
+ let tmpDataset = pEntry.dataset;
133
+ let tmpDownloadable = pService.isDownloadable(tmpDataset);
134
+ let tmpMethodLabel = tmpDownloadable ? tmpDataset.method : `${tmpDataset.method} (manual)`;
135
+
136
+ let tmpStatusIcon = ' ';
137
+ if (pStatus === 'cached')
138
+ {
139
+ tmpStatusIcon = '\u2713 ';
140
+ }
141
+ else if (pStatus === 'error')
142
+ {
143
+ tmpStatusIcon = '\u2717 ';
144
+ }
145
+ else if (pStatus === 'missing' && !tmpDownloadable)
146
+ {
147
+ tmpStatusIcon = '- ';
148
+ }
149
+
150
+ let tmpSizeStr = tmpDataset.size_compressed || '???';
151
+ let tmpTierStr = `T${tmpDataset.tier}`;
152
+
153
+ console.log(` ${tmpStatusIcon}[${tmpTierStr}] ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpMethodLabel.padEnd(16)} ${pStatus}`);
154
+ }
155
+
156
+ // ================================================================
157
+ // Main
158
+ // ================================================================
159
+
160
+ async function main()
161
+ {
162
+ // Create a lightweight Fable instance for the service
163
+ let tmpFable = libFable.new(
164
+ {
165
+ Product: 'RetoldFacto',
166
+ ProductVersion: '0.0.1',
167
+ LogLevel: 5
168
+ });
169
+
170
+ // Register and instantiate the DataLake service
171
+ tmpFable.serviceManager.addServiceType('RetoldFactoDataLakeService', libRetoldFactoDataLakeService);
172
+ let tmpService = tmpFable.serviceManager.instantiateServiceProvider('RetoldFactoDataLakeService',
173
+ {
174
+ CatalogPath: CATALOG_PATH,
175
+ DataDir: _Options.dataDir
176
+ });
177
+
178
+ let tmpCatalog = tmpService.loadCatalog();
179
+ if (!tmpCatalog)
180
+ {
181
+ console.error('Failed to load catalog.');
182
+ process.exit(1);
183
+ }
184
+
185
+ let tmpFilters = (
186
+ {
187
+ tier: _Options.tier,
188
+ category: _Options.category,
189
+ id: _Options.id
190
+ });
191
+
192
+ let tmpEntries = tmpService.flattenCatalog(tmpCatalog, tmpFilters);
193
+
194
+ if (tmpEntries.length === 0)
195
+ {
196
+ console.log('No datasets match the given filters.');
197
+ return;
198
+ }
199
+
200
+ // --list mode
201
+ if (_Options.list)
202
+ {
203
+ console.log('');
204
+ console.log('Retold Facto \u2014 Data Lake Status');
205
+ console.log(`Data directory: ${_Options.dataDir}`);
206
+ console.log('');
207
+
208
+ let tmpCurrentCategory = '';
209
+ let tmpCachedCount = 0;
210
+ let tmpMissingCount = 0;
211
+ let tmpErrorCount = 0;
212
+ let tmpManualCount = 0;
213
+ let tmpCachedSize = 0;
214
+
215
+ for (let i = 0; i < tmpEntries.length; i++)
216
+ {
217
+ let tmpEntry = tmpEntries[i];
218
+
219
+ if (tmpEntry.categoryFolder !== tmpCurrentCategory)
220
+ {
221
+ tmpCurrentCategory = tmpEntry.categoryFolder;
222
+ console.log(`\n ${tmpCurrentCategory}/`);
223
+ }
224
+
225
+ let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
226
+ printStatusLine(tmpService, tmpEntry, tmpStatus);
227
+
228
+ if (tmpStatus === 'cached')
229
+ {
230
+ tmpCachedCount++;
231
+ let tmpDatasetDir = tmpService.getDatasetDir(tmpEntry.categoryFolder, tmpEntry.dataset.id);
232
+ let tmpManifest = tmpService.readManifest(tmpDatasetDir);
233
+ if (tmpManifest)
234
+ {
235
+ tmpCachedSize += tmpManifest.total_size || 0;
236
+ }
237
+ }
238
+ else if (tmpStatus === 'error')
239
+ {
240
+ tmpErrorCount++;
241
+ }
242
+ else if (!tmpService.isDownloadable(tmpEntry.dataset))
243
+ {
244
+ tmpManualCount++;
245
+ }
246
+ else
247
+ {
248
+ tmpMissingCount++;
249
+ }
250
+ }
251
+
252
+ console.log('');
253
+ console.log(`Total: ${tmpEntries.length} datasets`);
254
+ console.log(` Cached: ${tmpCachedCount} (${tmpService.formatSize(tmpCachedSize)} on disk)`);
255
+ console.log(` Missing: ${tmpMissingCount} (downloadable)`);
256
+ console.log(` API/Manual: ${tmpManualCount} (need custom scripts)`);
257
+ if (tmpErrorCount > 0)
258
+ {
259
+ console.log(` Errors: ${tmpErrorCount}`);
260
+ }
261
+ console.log('');
262
+ return;
263
+ }
264
+
265
+ // --dry-run mode
266
+ if (_Options.dryRun)
267
+ {
268
+ console.log('');
269
+ console.log('Retold Facto \u2014 Download Dry Run');
270
+ if (_Options.tier !== null)
271
+ {
272
+ console.log(`Tier filter: <= ${_Options.tier}`);
273
+ }
274
+ if (_Options.category)
275
+ {
276
+ console.log(`Category filter: ${_Options.category}`);
277
+ }
278
+ if (_Options.id)
279
+ {
280
+ console.log(`Dataset filter: ${_Options.id}`);
281
+ }
282
+ console.log('');
283
+
284
+ let tmpCurrentCategory = '';
285
+ let tmpTotalSize = 0;
286
+ let tmpDownloadableCount = 0;
287
+ let tmpSkippedCount = 0;
288
+ let tmpAlreadyCachedCount = 0;
289
+
290
+ for (let i = 0; i < tmpEntries.length; i++)
291
+ {
292
+ let tmpEntry = tmpEntries[i];
293
+ let tmpDataset = tmpEntry.dataset;
294
+
295
+ if (tmpEntry.categoryFolder !== tmpCurrentCategory)
296
+ {
297
+ tmpCurrentCategory = tmpEntry.categoryFolder;
298
+ console.log(`\n ${tmpCurrentCategory}/`);
299
+ }
300
+
301
+ let tmpDownloadable = tmpService.isDownloadable(tmpDataset);
302
+ let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
303
+ let tmpSizeBytes = tmpService.parseSize(tmpDataset.size_compressed);
304
+ let tmpSizeStr = tmpDataset.size_compressed || '???';
305
+
306
+ if (!tmpDownloadable)
307
+ {
308
+ console.log(` SKIP ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpDataset.method} (needs custom script)`);
309
+ tmpSkippedCount++;
310
+ }
311
+ else if (tmpStatus === 'cached' && !_Options.force)
312
+ {
313
+ console.log(` OK ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} already cached`);
314
+ tmpAlreadyCachedCount++;
315
+ }
316
+ else
317
+ {
318
+ let tmpUrlCount = tmpDataset.method === 'rest_api' ? (tmpDataset.fetch_steps || []).length + ' step(s)' : `${(tmpDataset.urls || []).length} URL(s)`;
319
+ console.log(` GET ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpDataset.method} \u2192 ${tmpUrlCount}`);
320
+ tmpTotalSize += tmpSizeBytes;
321
+ tmpDownloadableCount++;
322
+ }
323
+ }
324
+
325
+ console.log('');
326
+ console.log('Summary:');
327
+ console.log(` Would download: ${tmpDownloadableCount} datasets (~${tmpService.formatSize(tmpTotalSize)})`);
328
+ console.log(` Already cached: ${tmpAlreadyCachedCount}`);
329
+ console.log(` Skipped (API): ${tmpSkippedCount}`);
330
+ console.log(` Total matched: ${tmpEntries.length}`);
331
+ console.log('');
332
+ console.log('Run without --dry-run to download.');
333
+ console.log('');
334
+ return;
335
+ }
336
+
337
+ // Download mode
338
+ console.log('');
339
+ console.log('Retold Facto \u2014 Downloading to Offline Data Lake');
340
+ console.log(`Data directory: ${_Options.dataDir}`);
341
+ console.log('');
342
+
343
+ let tmpDownloadable = [];
344
+ let tmpSkipped = [];
345
+
346
+ for (let i = 0; i < tmpEntries.length; i++)
347
+ {
348
+ let tmpEntry = tmpEntries[i];
349
+ let tmpDataset = tmpEntry.dataset;
350
+
351
+ if (!tmpService.isDownloadable(tmpDataset))
352
+ {
353
+ tmpSkipped.push(tmpEntry);
354
+ continue;
355
+ }
356
+
357
+ let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
358
+ if (tmpStatus === 'cached' && !_Options.force)
359
+ {
360
+ console.log(` [CACHED] ${tmpDataset.id} \u2014 already downloaded`);
361
+ continue;
362
+ }
363
+
364
+ tmpDownloadable.push(tmpEntry);
365
+ }
366
+
367
+ if (tmpDownloadable.length === 0)
368
+ {
369
+ console.log('Nothing to download. All matching datasets are cached or API-only.');
370
+ if (tmpSkipped.length > 0)
371
+ {
372
+ console.log(`(${tmpSkipped.length} API/manual datasets were skipped)`);
373
+ }
374
+ console.log('');
375
+ return;
376
+ }
377
+
378
+ console.log(`Downloading ${tmpDownloadable.length} dataset(s)...`);
379
+ console.log('');
380
+
381
+ let tmpSuccessCount = 0;
382
+ let tmpErrorCount = 0;
383
+
384
+ for (let i = 0; i < tmpDownloadable.length; i++)
385
+ {
386
+ let tmpEntry = tmpDownloadable[i];
387
+ let tmpDataset = tmpEntry.dataset;
388
+
389
+ console.log(`[${i + 1}/${tmpDownloadable.length}] ${tmpDataset.name} (${tmpDataset.id})`);
390
+ console.log(` Method: ${tmpDataset.method} | Size: ${tmpDataset.size_compressed || '???'} | Tier: ${tmpDataset.tier}`);
391
+
392
+ let tmpManifest = await tmpService.downloadDataset(tmpEntry);
393
+
394
+ if (tmpManifest.status === 'complete')
395
+ {
396
+ console.log(` Complete: ${tmpService.formatSize(tmpManifest.total_size)} in ${tmpManifest.files.length} file(s)`);
397
+ tmpSuccessCount++;
398
+ }
399
+ else
400
+ {
401
+ tmpErrorCount++;
402
+ }
403
+ console.log('');
404
+ }
405
+
406
+ console.log('');
407
+ console.log('Download complete.');
408
+ console.log(` Success: ${tmpSuccessCount}`);
409
+ if (tmpErrorCount > 0)
410
+ {
411
+ console.log(` Errors: ${tmpErrorCount}`);
412
+ }
413
+ if (tmpSkipped.length > 0)
414
+ {
415
+ console.log(` Skipped: ${tmpSkipped.length} (API/manual \u2014 need custom scripts)`);
416
+ }
417
+ console.log('');
418
+ }
419
+
420
+ main().catch(
421
+ (pError) =>
422
+ {
423
+ console.error(`Fatal error: ${pError.message}`);
424
+ process.exit(1);
425
+ });