retold-facto 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/launch.json +11 -0
- package/.dockerignore +8 -0
- package/.quackage.json +19 -0
- package/Dockerfile +26 -0
- package/bin/retold-facto.js +909 -0
- package/examples/facto-government-data.sqlite +0 -0
- package/examples/government-data-catalog.json +137 -0
- package/examples/government-data-loader.js +1432 -0
- package/package.json +91 -0
- package/scripts/facto-download.js +425 -0
- package/source/Retold-Facto.js +1042 -0
- package/source/services/Retold-Facto-BeaconProvider.js +511 -0
- package/source/services/Retold-Facto-CatalogManager.js +1252 -0
- package/source/services/Retold-Facto-DataLakeService.js +1642 -0
- package/source/services/Retold-Facto-DatasetManager.js +417 -0
- package/source/services/Retold-Facto-IngestEngine.js +1315 -0
- package/source/services/Retold-Facto-ProjectionEngine.js +3960 -0
- package/source/services/Retold-Facto-RecordManager.js +360 -0
- package/source/services/Retold-Facto-SchemaManager.js +1110 -0
- package/source/services/Retold-Facto-SourceFolderScanner.js +2243 -0
- package/source/services/Retold-Facto-SourceManager.js +730 -0
- package/source/services/Retold-Facto-StoreConnectionManager.js +441 -0
- package/source/services/Retold-Facto-ThroughputMonitor.js +478 -0
- package/source/services/web-app/codemirror-entry.js +7 -0
- package/source/services/web-app/pict-app/Pict-Application-Facto-Configuration.json +9 -0
- package/source/services/web-app/pict-app/Pict-Application-Facto.js +70 -0
- package/source/services/web-app/pict-app/Pict-Facto-Bundle.js +11 -0
- package/source/services/web-app/pict-app/providers/Pict-Provider-Facto-UI.js +66 -0
- package/source/services/web-app/pict-app/providers/Pict-Provider-Facto.js +69 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Catalog.js +93 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Connections.js +42 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Datasets.js +605 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Projections.js +188 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Scanner.js +80 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Schema.js +116 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Sources.js +104 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Catalog.js +526 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Datasets.js +173 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Ingest.js +259 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Layout.js +191 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Projections.js +231 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Records.js +326 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Scanner.js +624 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Sources.js +201 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Throughput.js +456 -0
- package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full-Configuration.json +14 -0
- package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full.js +391 -0
- package/source/services/web-app/pict-app-full/providers/PictRouter-Facto-Configuration.json +56 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-BottomBar.js +68 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Connections.js +340 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboard.js +149 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboards.js +819 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Datasets.js +178 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-IngestJobs.js +99 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Layout.js +62 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-MappingEditor.js +158 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-ProjectionDetail.js +1120 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Projections.js +172 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-QueryPanel.js +119 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-RecordViewer.js +663 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Records.js +648 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Scanner.js +1017 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDetail.js +1404 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDocEditor.js +1036 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaEditor.js +636 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaResearch.js +357 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceDetail.js +822 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceEditor.js +1036 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceResearch.js +487 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Sources.js +165 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Throughput.js +439 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-TopBar.js +335 -0
- package/source/services/web-app/pict-app-full/views/projections/Facto-Projections-Constants.js +71 -0
- package/source/services/web-app/web/chart.min.js +20 -0
- package/source/services/web-app/web/codemirror-bundle.js +30099 -0
- package/source/services/web-app/web/css/facto-themes.css +467 -0
- package/source/services/web-app/web/css/facto.css +502 -0
- package/source/services/web-app/web/index.html +28 -0
- package/source/services/web-app/web/retold-facto.js +12138 -0
- package/source/services/web-app/web/retold-facto.js.map +1 -0
- package/source/services/web-app/web/retold-facto.min.js +2 -0
- package/source/services/web-app/web/retold-facto.min.js.map +1 -0
- package/source/services/web-app/web/simple/index.html +17 -0
- package/test/Facto_Browser_Integration_tests.js +798 -0
- package/test/RetoldFacto_tests.js +4117 -0
- package/test/fixtures/weather-readings.csv +17 -0
- package/test/fixtures/weather-stations.csv +9 -0
- package/test/model/MeadowModel-Extended.json +8497 -0
- package/test/model/MeadowModel-PICT.json +1 -0
- package/test/model/MeadowModel.json +1355 -0
- package/test/model/ddl/Facto.ddl +225 -0
- package/test/model/fable-configuration.json +14 -0
package/package.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "retold-facto",
|
|
3
|
+
"version": "0.0.4",
|
|
4
|
+
"description": "Data warehouse and knowledge graph storage for the Retold ecosystem.",
|
|
5
|
+
"main": "source/Retold-Facto.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"retold-facto": "bin/retold-facto.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"start": "node bin/retold-facto.js",
|
|
11
|
+
"coverage": "npx quack coverage",
|
|
12
|
+
"test": "npx quack test",
|
|
13
|
+
"test-browser": "npx mocha test/Facto_Browser_Integration_tests.js -u tdd --exit --timeout 120000",
|
|
14
|
+
"build": "npx quack build",
|
|
15
|
+
"build-codemirror": "npx esbuild source/services/web-app/codemirror-entry.js --bundle --outfile=source/services/web-app/web/codemirror-bundle.js --format=iife --global-name=CodeMirrorModules --platform=browser --target=es2018",
|
|
16
|
+
"build-test-model": "cd test && npx stricture -i model/ddl/Facto.ddl",
|
|
17
|
+
"docker-build": "docker build -t retold-facto .",
|
|
18
|
+
"docker-run": "docker run -p 8386:8386 retold-facto"
|
|
19
|
+
},
|
|
20
|
+
"mocha": {
|
|
21
|
+
"spec": "test/RetoldFacto_tests.js",
|
|
22
|
+
"diff": true,
|
|
23
|
+
"extension": [
|
|
24
|
+
"js"
|
|
25
|
+
],
|
|
26
|
+
"package": "./package.json",
|
|
27
|
+
"reporter": "spec",
|
|
28
|
+
"slow": "75",
|
|
29
|
+
"timeout": "5000",
|
|
30
|
+
"ui": "tdd",
|
|
31
|
+
"watch-files": [
|
|
32
|
+
"source/**/*.js",
|
|
33
|
+
"test/**/*.js"
|
|
34
|
+
],
|
|
35
|
+
"watch-ignore": [
|
|
36
|
+
"lib/vendor"
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "https://github.com/stevenvelozo/retold-facto.git"
|
|
42
|
+
},
|
|
43
|
+
"keywords": [
|
|
44
|
+
"data",
|
|
45
|
+
"warehouse",
|
|
46
|
+
"knowledge-graph",
|
|
47
|
+
"certainty",
|
|
48
|
+
"ingest"
|
|
49
|
+
],
|
|
50
|
+
"author": "Steven Velozo <steven@velozo.com> (http://velozo.com/)",
|
|
51
|
+
"license": "MIT",
|
|
52
|
+
"bugs": {
|
|
53
|
+
"url": "https://github.com/stevenvelozo/retold-facto/issues"
|
|
54
|
+
},
|
|
55
|
+
"homepage": "https://github.com/stevenvelozo/retold-facto",
|
|
56
|
+
"devDependencies": {
|
|
57
|
+
"chai": "^4.5.0",
|
|
58
|
+
"meadow-connection-sqlite": "^1.0.18",
|
|
59
|
+
"puppeteer": "^24.8.0",
|
|
60
|
+
"quackage": "^1.0.65",
|
|
61
|
+
"stricture": "^4.0.2",
|
|
62
|
+
"supertest": "^7.2.2"
|
|
63
|
+
},
|
|
64
|
+
"dependencies": {
|
|
65
|
+
"@codemirror/lang-markdown": "^6.5.0",
|
|
66
|
+
"@codemirror/state": "^6.6.0",
|
|
67
|
+
"@codemirror/view": "^6.40.0",
|
|
68
|
+
"bibliograph": "^0.1.4",
|
|
69
|
+
"codemirror": "^6.0.2",
|
|
70
|
+
"fable": "^3.1.67",
|
|
71
|
+
"fable-serviceproviderbase": "^3.0.19",
|
|
72
|
+
"fast-xml-parser": "^5.5.8",
|
|
73
|
+
"meadow": "^2.0.33",
|
|
74
|
+
"meadow-connection-manager": "^1.0.0",
|
|
75
|
+
"meadow-connection-mysql": "^1.0.14",
|
|
76
|
+
"meadow-endpoints": "^4.0.14",
|
|
77
|
+
"meadow-integration": "^1.0.21",
|
|
78
|
+
"orator": "^6.0.4",
|
|
79
|
+
"orator-serviceserver-restify": "^2.0.9",
|
|
80
|
+
"orator-static-server": "^2.0.4",
|
|
81
|
+
"pict": "^1.0.359",
|
|
82
|
+
"pict-router": "^1.0.9",
|
|
83
|
+
"pict-section-flow": "^0.0.17",
|
|
84
|
+
"pict-section-modal": "^0.0.1",
|
|
85
|
+
"pict-section-histogram": "^1.0.0",
|
|
86
|
+
"pict-section-markdowneditor": "^1.0.7",
|
|
87
|
+
"pict-section-objecteditor": "^1.0.1",
|
|
88
|
+
"stricture": "^4.0.2",
|
|
89
|
+
"xlsx": "^0.18.5"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Retold Facto — Offline Data Lake Downloader (CLI)
|
|
4
|
+
*
|
|
5
|
+
* Thin CLI wrapper around RetoldFactoDataLakeService.
|
|
6
|
+
* Reads download-catalog.json and downloads public datasets into a
|
|
7
|
+
* well-organized folder structure for stable, repeatable ingestion.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* node scripts/facto-download.js --dry-run
|
|
11
|
+
* node scripts/facto-download.js --dry-run --tier 2
|
|
12
|
+
* node scripts/facto-download.js --id iso-4217-currencies
|
|
13
|
+
* node scripts/facto-download.js --list
|
|
14
|
+
* node scripts/facto-download.js --category 01_foundational_reference
|
|
15
|
+
* node scripts/facto-download.js --tier 2 --force
|
|
16
|
+
*
|
|
17
|
+
* @author Steven Velozo <steven@velozo.com>
|
|
18
|
+
*/
|
|
19
|
+
const libFable = require('fable');
|
|
20
|
+
const libPath = require('path');
|
|
21
|
+
|
|
22
|
+
const libRetoldFactoDataLakeService = require('../source/services/Retold-Facto-DataLakeService.js');
|
|
23
|
+
|
|
24
|
+
// ================================================================
|
|
25
|
+
// Constants
|
|
26
|
+
// ================================================================
|
|
27
|
+
|
|
28
|
+
const CATALOG_PATH = libPath.join(__dirname, '..', 'documentation', 'datasets', 'download-catalog.json');
|
|
29
|
+
const DEFAULT_DATA_DIR = libPath.join(__dirname, '..', 'dist', 'retold-facto-raw-data');
|
|
30
|
+
|
|
31
|
+
// ================================================================
|
|
32
|
+
// CLI Argument Parsing
|
|
33
|
+
// ================================================================
|
|
34
|
+
|
|
35
|
+
let _Options = (
|
|
36
|
+
{
|
|
37
|
+
dryRun: false,
|
|
38
|
+
list: false,
|
|
39
|
+
force: false,
|
|
40
|
+
tier: null,
|
|
41
|
+
category: null,
|
|
42
|
+
id: null,
|
|
43
|
+
dataDir: DEFAULT_DATA_DIR
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
let tmpArgs = process.argv.slice(2);
|
|
47
|
+
for (let i = 0; i < tmpArgs.length; i++)
|
|
48
|
+
{
|
|
49
|
+
if (tmpArgs[i] === '--dry-run' || tmpArgs[i] === '-n')
|
|
50
|
+
{
|
|
51
|
+
_Options.dryRun = true;
|
|
52
|
+
}
|
|
53
|
+
else if (tmpArgs[i] === '--list' || tmpArgs[i] === '-l')
|
|
54
|
+
{
|
|
55
|
+
_Options.list = true;
|
|
56
|
+
}
|
|
57
|
+
else if (tmpArgs[i] === '--force' || tmpArgs[i] === '-f')
|
|
58
|
+
{
|
|
59
|
+
_Options.force = true;
|
|
60
|
+
}
|
|
61
|
+
else if ((tmpArgs[i] === '--tier' || tmpArgs[i] === '-t') && tmpArgs[i + 1])
|
|
62
|
+
{
|
|
63
|
+
_Options.tier = parseInt(tmpArgs[i + 1], 10);
|
|
64
|
+
i++;
|
|
65
|
+
}
|
|
66
|
+
else if ((tmpArgs[i] === '--category' || tmpArgs[i] === '-c') && tmpArgs[i + 1])
|
|
67
|
+
{
|
|
68
|
+
_Options.category = tmpArgs[i + 1];
|
|
69
|
+
i++;
|
|
70
|
+
}
|
|
71
|
+
else if ((tmpArgs[i] === '--id') && tmpArgs[i + 1])
|
|
72
|
+
{
|
|
73
|
+
_Options.id = tmpArgs[i + 1];
|
|
74
|
+
i++;
|
|
75
|
+
}
|
|
76
|
+
else if ((tmpArgs[i] === '--data-dir' || tmpArgs[i] === '-d') && tmpArgs[i + 1])
|
|
77
|
+
{
|
|
78
|
+
_Options.dataDir = libPath.resolve(tmpArgs[i + 1]);
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
else if (tmpArgs[i] === '--skip-api')
|
|
82
|
+
{
|
|
83
|
+
// Default behavior — flag exists for explicitness
|
|
84
|
+
}
|
|
85
|
+
else if (tmpArgs[i] === '--help' || tmpArgs[i] === '-h')
|
|
86
|
+
{
|
|
87
|
+
console.log('Retold Facto — Offline Data Lake Downloader');
|
|
88
|
+
console.log('');
|
|
89
|
+
console.log('Usage:');
|
|
90
|
+
console.log(' node scripts/facto-download.js [options]');
|
|
91
|
+
console.log('');
|
|
92
|
+
console.log('Options:');
|
|
93
|
+
console.log(' --dry-run, -n Show what would be downloaded without downloading');
|
|
94
|
+
console.log(' --list, -l List all datasets with cached/missing status');
|
|
95
|
+
console.log(' --tier, -t <n> Only datasets at tier N or lower');
|
|
96
|
+
console.log(' --category, -c <name> Only datasets in this category');
|
|
97
|
+
console.log(' --id <id> Download a single dataset by ID');
|
|
98
|
+
console.log(' --data-dir, -d <path> Override data directory (default: data/)');
|
|
99
|
+
console.log(' --force, -f Re-download even if already cached');
|
|
100
|
+
console.log(' --skip-api Skip API-only datasets (default behavior)');
|
|
101
|
+
console.log(' --help, -h Show this help');
|
|
102
|
+
console.log('');
|
|
103
|
+
console.log('Categories:');
|
|
104
|
+
console.log(' 01_foundational_reference');
|
|
105
|
+
console.log(' 02_geographic_location');
|
|
106
|
+
console.log(' 03_people_cultural_entities');
|
|
107
|
+
console.log(' 04_business_industry');
|
|
108
|
+
console.log(' 05_media_entertainment');
|
|
109
|
+
console.log('');
|
|
110
|
+
console.log('Examples:');
|
|
111
|
+
console.log(' node scripts/facto-download.js --dry-run');
|
|
112
|
+
console.log(' node scripts/facto-download.js --dry-run --tier 1');
|
|
113
|
+
console.log(' node scripts/facto-download.js --id iso-4217-currencies');
|
|
114
|
+
console.log(' node scripts/facto-download.js --tier 2');
|
|
115
|
+
console.log(' node scripts/facto-download.js --list');
|
|
116
|
+
process.exit(0);
|
|
117
|
+
}
|
|
118
|
+
else
|
|
119
|
+
{
|
|
120
|
+
console.error(`Unknown argument: ${tmpArgs[i]}`);
|
|
121
|
+
console.error('Use --help for usage information.');
|
|
122
|
+
process.exit(1);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ================================================================
|
|
127
|
+
// Reporting Helpers
|
|
128
|
+
// ================================================================
|
|
129
|
+
|
|
130
|
+
function printStatusLine(pService, pEntry, pStatus)
|
|
131
|
+
{
|
|
132
|
+
let tmpDataset = pEntry.dataset;
|
|
133
|
+
let tmpDownloadable = pService.isDownloadable(tmpDataset);
|
|
134
|
+
let tmpMethodLabel = tmpDownloadable ? tmpDataset.method : `${tmpDataset.method} (manual)`;
|
|
135
|
+
|
|
136
|
+
let tmpStatusIcon = ' ';
|
|
137
|
+
if (pStatus === 'cached')
|
|
138
|
+
{
|
|
139
|
+
tmpStatusIcon = '\u2713 ';
|
|
140
|
+
}
|
|
141
|
+
else if (pStatus === 'error')
|
|
142
|
+
{
|
|
143
|
+
tmpStatusIcon = '\u2717 ';
|
|
144
|
+
}
|
|
145
|
+
else if (pStatus === 'missing' && !tmpDownloadable)
|
|
146
|
+
{
|
|
147
|
+
tmpStatusIcon = '- ';
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
let tmpSizeStr = tmpDataset.size_compressed || '???';
|
|
151
|
+
let tmpTierStr = `T${tmpDataset.tier}`;
|
|
152
|
+
|
|
153
|
+
console.log(` ${tmpStatusIcon}[${tmpTierStr}] ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpMethodLabel.padEnd(16)} ${pStatus}`);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ================================================================
|
|
157
|
+
// Main
|
|
158
|
+
// ================================================================
|
|
159
|
+
|
|
160
|
+
async function main()
|
|
161
|
+
{
|
|
162
|
+
// Create a lightweight Fable instance for the service
|
|
163
|
+
let tmpFable = libFable.new(
|
|
164
|
+
{
|
|
165
|
+
Product: 'RetoldFacto',
|
|
166
|
+
ProductVersion: '0.0.1',
|
|
167
|
+
LogLevel: 5
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
// Register and instantiate the DataLake service
|
|
171
|
+
tmpFable.serviceManager.addServiceType('RetoldFactoDataLakeService', libRetoldFactoDataLakeService);
|
|
172
|
+
let tmpService = tmpFable.serviceManager.instantiateServiceProvider('RetoldFactoDataLakeService',
|
|
173
|
+
{
|
|
174
|
+
CatalogPath: CATALOG_PATH,
|
|
175
|
+
DataDir: _Options.dataDir
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
let tmpCatalog = tmpService.loadCatalog();
|
|
179
|
+
if (!tmpCatalog)
|
|
180
|
+
{
|
|
181
|
+
console.error('Failed to load catalog.');
|
|
182
|
+
process.exit(1);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
let tmpFilters = (
|
|
186
|
+
{
|
|
187
|
+
tier: _Options.tier,
|
|
188
|
+
category: _Options.category,
|
|
189
|
+
id: _Options.id
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
let tmpEntries = tmpService.flattenCatalog(tmpCatalog, tmpFilters);
|
|
193
|
+
|
|
194
|
+
if (tmpEntries.length === 0)
|
|
195
|
+
{
|
|
196
|
+
console.log('No datasets match the given filters.');
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// --list mode
|
|
201
|
+
if (_Options.list)
|
|
202
|
+
{
|
|
203
|
+
console.log('');
|
|
204
|
+
console.log('Retold Facto \u2014 Data Lake Status');
|
|
205
|
+
console.log(`Data directory: ${_Options.dataDir}`);
|
|
206
|
+
console.log('');
|
|
207
|
+
|
|
208
|
+
let tmpCurrentCategory = '';
|
|
209
|
+
let tmpCachedCount = 0;
|
|
210
|
+
let tmpMissingCount = 0;
|
|
211
|
+
let tmpErrorCount = 0;
|
|
212
|
+
let tmpManualCount = 0;
|
|
213
|
+
let tmpCachedSize = 0;
|
|
214
|
+
|
|
215
|
+
for (let i = 0; i < tmpEntries.length; i++)
|
|
216
|
+
{
|
|
217
|
+
let tmpEntry = tmpEntries[i];
|
|
218
|
+
|
|
219
|
+
if (tmpEntry.categoryFolder !== tmpCurrentCategory)
|
|
220
|
+
{
|
|
221
|
+
tmpCurrentCategory = tmpEntry.categoryFolder;
|
|
222
|
+
console.log(`\n ${tmpCurrentCategory}/`);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
|
|
226
|
+
printStatusLine(tmpService, tmpEntry, tmpStatus);
|
|
227
|
+
|
|
228
|
+
if (tmpStatus === 'cached')
|
|
229
|
+
{
|
|
230
|
+
tmpCachedCount++;
|
|
231
|
+
let tmpDatasetDir = tmpService.getDatasetDir(tmpEntry.categoryFolder, tmpEntry.dataset.id);
|
|
232
|
+
let tmpManifest = tmpService.readManifest(tmpDatasetDir);
|
|
233
|
+
if (tmpManifest)
|
|
234
|
+
{
|
|
235
|
+
tmpCachedSize += tmpManifest.total_size || 0;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
else if (tmpStatus === 'error')
|
|
239
|
+
{
|
|
240
|
+
tmpErrorCount++;
|
|
241
|
+
}
|
|
242
|
+
else if (!tmpService.isDownloadable(tmpEntry.dataset))
|
|
243
|
+
{
|
|
244
|
+
tmpManualCount++;
|
|
245
|
+
}
|
|
246
|
+
else
|
|
247
|
+
{
|
|
248
|
+
tmpMissingCount++;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
console.log('');
|
|
253
|
+
console.log(`Total: ${tmpEntries.length} datasets`);
|
|
254
|
+
console.log(` Cached: ${tmpCachedCount} (${tmpService.formatSize(tmpCachedSize)} on disk)`);
|
|
255
|
+
console.log(` Missing: ${tmpMissingCount} (downloadable)`);
|
|
256
|
+
console.log(` API/Manual: ${tmpManualCount} (need custom scripts)`);
|
|
257
|
+
if (tmpErrorCount > 0)
|
|
258
|
+
{
|
|
259
|
+
console.log(` Errors: ${tmpErrorCount}`);
|
|
260
|
+
}
|
|
261
|
+
console.log('');
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// --dry-run mode
|
|
266
|
+
if (_Options.dryRun)
|
|
267
|
+
{
|
|
268
|
+
console.log('');
|
|
269
|
+
console.log('Retold Facto \u2014 Download Dry Run');
|
|
270
|
+
if (_Options.tier !== null)
|
|
271
|
+
{
|
|
272
|
+
console.log(`Tier filter: <= ${_Options.tier}`);
|
|
273
|
+
}
|
|
274
|
+
if (_Options.category)
|
|
275
|
+
{
|
|
276
|
+
console.log(`Category filter: ${_Options.category}`);
|
|
277
|
+
}
|
|
278
|
+
if (_Options.id)
|
|
279
|
+
{
|
|
280
|
+
console.log(`Dataset filter: ${_Options.id}`);
|
|
281
|
+
}
|
|
282
|
+
console.log('');
|
|
283
|
+
|
|
284
|
+
let tmpCurrentCategory = '';
|
|
285
|
+
let tmpTotalSize = 0;
|
|
286
|
+
let tmpDownloadableCount = 0;
|
|
287
|
+
let tmpSkippedCount = 0;
|
|
288
|
+
let tmpAlreadyCachedCount = 0;
|
|
289
|
+
|
|
290
|
+
for (let i = 0; i < tmpEntries.length; i++)
|
|
291
|
+
{
|
|
292
|
+
let tmpEntry = tmpEntries[i];
|
|
293
|
+
let tmpDataset = tmpEntry.dataset;
|
|
294
|
+
|
|
295
|
+
if (tmpEntry.categoryFolder !== tmpCurrentCategory)
|
|
296
|
+
{
|
|
297
|
+
tmpCurrentCategory = tmpEntry.categoryFolder;
|
|
298
|
+
console.log(`\n ${tmpCurrentCategory}/`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
let tmpDownloadable = tmpService.isDownloadable(tmpDataset);
|
|
302
|
+
let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
|
|
303
|
+
let tmpSizeBytes = tmpService.parseSize(tmpDataset.size_compressed);
|
|
304
|
+
let tmpSizeStr = tmpDataset.size_compressed || '???';
|
|
305
|
+
|
|
306
|
+
if (!tmpDownloadable)
|
|
307
|
+
{
|
|
308
|
+
console.log(` SKIP ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpDataset.method} (needs custom script)`);
|
|
309
|
+
tmpSkippedCount++;
|
|
310
|
+
}
|
|
311
|
+
else if (tmpStatus === 'cached' && !_Options.force)
|
|
312
|
+
{
|
|
313
|
+
console.log(` OK ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} already cached`);
|
|
314
|
+
tmpAlreadyCachedCount++;
|
|
315
|
+
}
|
|
316
|
+
else
|
|
317
|
+
{
|
|
318
|
+
let tmpUrlCount = tmpDataset.method === 'rest_api' ? (tmpDataset.fetch_steps || []).length + ' step(s)' : `${(tmpDataset.urls || []).length} URL(s)`;
|
|
319
|
+
console.log(` GET ${tmpDataset.id.padEnd(35)} ${tmpSizeStr.padEnd(15)} ${tmpDataset.method} \u2192 ${tmpUrlCount}`);
|
|
320
|
+
tmpTotalSize += tmpSizeBytes;
|
|
321
|
+
tmpDownloadableCount++;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
console.log('');
|
|
326
|
+
console.log('Summary:');
|
|
327
|
+
console.log(` Would download: ${tmpDownloadableCount} datasets (~${tmpService.formatSize(tmpTotalSize)})`);
|
|
328
|
+
console.log(` Already cached: ${tmpAlreadyCachedCount}`);
|
|
329
|
+
console.log(` Skipped (API): ${tmpSkippedCount}`);
|
|
330
|
+
console.log(` Total matched: ${tmpEntries.length}`);
|
|
331
|
+
console.log('');
|
|
332
|
+
console.log('Run without --dry-run to download.');
|
|
333
|
+
console.log('');
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Download mode
|
|
338
|
+
console.log('');
|
|
339
|
+
console.log('Retold Facto \u2014 Downloading to Offline Data Lake');
|
|
340
|
+
console.log(`Data directory: ${_Options.dataDir}`);
|
|
341
|
+
console.log('');
|
|
342
|
+
|
|
343
|
+
let tmpDownloadable = [];
|
|
344
|
+
let tmpSkipped = [];
|
|
345
|
+
|
|
346
|
+
for (let i = 0; i < tmpEntries.length; i++)
|
|
347
|
+
{
|
|
348
|
+
let tmpEntry = tmpEntries[i];
|
|
349
|
+
let tmpDataset = tmpEntry.dataset;
|
|
350
|
+
|
|
351
|
+
if (!tmpService.isDownloadable(tmpDataset))
|
|
352
|
+
{
|
|
353
|
+
tmpSkipped.push(tmpEntry);
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
let tmpStatus = tmpService.getDatasetStatus(tmpEntry);
|
|
358
|
+
if (tmpStatus === 'cached' && !_Options.force)
|
|
359
|
+
{
|
|
360
|
+
console.log(` [CACHED] ${tmpDataset.id} \u2014 already downloaded`);
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
tmpDownloadable.push(tmpEntry);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
if (tmpDownloadable.length === 0)
|
|
368
|
+
{
|
|
369
|
+
console.log('Nothing to download. All matching datasets are cached or API-only.');
|
|
370
|
+
if (tmpSkipped.length > 0)
|
|
371
|
+
{
|
|
372
|
+
console.log(`(${tmpSkipped.length} API/manual datasets were skipped)`);
|
|
373
|
+
}
|
|
374
|
+
console.log('');
|
|
375
|
+
return;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
console.log(`Downloading ${tmpDownloadable.length} dataset(s)...`);
|
|
379
|
+
console.log('');
|
|
380
|
+
|
|
381
|
+
let tmpSuccessCount = 0;
|
|
382
|
+
let tmpErrorCount = 0;
|
|
383
|
+
|
|
384
|
+
for (let i = 0; i < tmpDownloadable.length; i++)
|
|
385
|
+
{
|
|
386
|
+
let tmpEntry = tmpDownloadable[i];
|
|
387
|
+
let tmpDataset = tmpEntry.dataset;
|
|
388
|
+
|
|
389
|
+
console.log(`[${i + 1}/${tmpDownloadable.length}] ${tmpDataset.name} (${tmpDataset.id})`);
|
|
390
|
+
console.log(` Method: ${tmpDataset.method} | Size: ${tmpDataset.size_compressed || '???'} | Tier: ${tmpDataset.tier}`);
|
|
391
|
+
|
|
392
|
+
let tmpManifest = await tmpService.downloadDataset(tmpEntry);
|
|
393
|
+
|
|
394
|
+
if (tmpManifest.status === 'complete')
|
|
395
|
+
{
|
|
396
|
+
console.log(` Complete: ${tmpService.formatSize(tmpManifest.total_size)} in ${tmpManifest.files.length} file(s)`);
|
|
397
|
+
tmpSuccessCount++;
|
|
398
|
+
}
|
|
399
|
+
else
|
|
400
|
+
{
|
|
401
|
+
tmpErrorCount++;
|
|
402
|
+
}
|
|
403
|
+
console.log('');
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
console.log('');
|
|
407
|
+
console.log('Download complete.');
|
|
408
|
+
console.log(` Success: ${tmpSuccessCount}`);
|
|
409
|
+
if (tmpErrorCount > 0)
|
|
410
|
+
{
|
|
411
|
+
console.log(` Errors: ${tmpErrorCount}`);
|
|
412
|
+
}
|
|
413
|
+
if (tmpSkipped.length > 0)
|
|
414
|
+
{
|
|
415
|
+
console.log(` Skipped: ${tmpSkipped.length} (API/manual \u2014 need custom scripts)`);
|
|
416
|
+
}
|
|
417
|
+
console.log('');
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
main().catch(
|
|
421
|
+
(pError) =>
|
|
422
|
+
{
|
|
423
|
+
console.error(`Fatal error: ${pError.message}`);
|
|
424
|
+
process.exit(1);
|
|
425
|
+
});
|