retold-facto 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/launch.json +11 -0
- package/.dockerignore +8 -0
- package/.quackage.json +19 -0
- package/Dockerfile +26 -0
- package/bin/retold-facto.js +909 -0
- package/examples/facto-government-data.sqlite +0 -0
- package/examples/government-data-catalog.json +137 -0
- package/examples/government-data-loader.js +1432 -0
- package/package.json +91 -0
- package/scripts/facto-download.js +425 -0
- package/source/Retold-Facto.js +1042 -0
- package/source/services/Retold-Facto-BeaconProvider.js +511 -0
- package/source/services/Retold-Facto-CatalogManager.js +1252 -0
- package/source/services/Retold-Facto-DataLakeService.js +1642 -0
- package/source/services/Retold-Facto-DatasetManager.js +417 -0
- package/source/services/Retold-Facto-IngestEngine.js +1315 -0
- package/source/services/Retold-Facto-ProjectionEngine.js +3960 -0
- package/source/services/Retold-Facto-RecordManager.js +360 -0
- package/source/services/Retold-Facto-SchemaManager.js +1110 -0
- package/source/services/Retold-Facto-SourceFolderScanner.js +2243 -0
- package/source/services/Retold-Facto-SourceManager.js +730 -0
- package/source/services/Retold-Facto-StoreConnectionManager.js +441 -0
- package/source/services/Retold-Facto-ThroughputMonitor.js +478 -0
- package/source/services/web-app/codemirror-entry.js +7 -0
- package/source/services/web-app/pict-app/Pict-Application-Facto-Configuration.json +9 -0
- package/source/services/web-app/pict-app/Pict-Application-Facto.js +70 -0
- package/source/services/web-app/pict-app/Pict-Facto-Bundle.js +11 -0
- package/source/services/web-app/pict-app/providers/Pict-Provider-Facto-UI.js +66 -0
- package/source/services/web-app/pict-app/providers/Pict-Provider-Facto.js +69 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Catalog.js +93 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Connections.js +42 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Datasets.js +605 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Projections.js +188 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Scanner.js +80 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Schema.js +116 -0
- package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Sources.js +104 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Catalog.js +526 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Datasets.js +173 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Ingest.js +259 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Layout.js +191 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Projections.js +231 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Records.js +326 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Scanner.js +624 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Sources.js +201 -0
- package/source/services/web-app/pict-app/views/PictView-Facto-Throughput.js +456 -0
- package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full-Configuration.json +14 -0
- package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full.js +391 -0
- package/source/services/web-app/pict-app-full/providers/PictRouter-Facto-Configuration.json +56 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-BottomBar.js +68 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Connections.js +340 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboard.js +149 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboards.js +819 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Datasets.js +178 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-IngestJobs.js +99 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Layout.js +62 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-MappingEditor.js +158 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-ProjectionDetail.js +1120 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Projections.js +172 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-QueryPanel.js +119 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-RecordViewer.js +663 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Records.js +648 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Scanner.js +1017 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDetail.js +1404 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDocEditor.js +1036 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaEditor.js +636 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaResearch.js +357 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceDetail.js +822 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceEditor.js +1036 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceResearch.js +487 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Sources.js +165 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Throughput.js +439 -0
- package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-TopBar.js +335 -0
- package/source/services/web-app/pict-app-full/views/projections/Facto-Projections-Constants.js +71 -0
- package/source/services/web-app/web/chart.min.js +20 -0
- package/source/services/web-app/web/codemirror-bundle.js +30099 -0
- package/source/services/web-app/web/css/facto-themes.css +467 -0
- package/source/services/web-app/web/css/facto.css +502 -0
- package/source/services/web-app/web/index.html +28 -0
- package/source/services/web-app/web/retold-facto.js +12138 -0
- package/source/services/web-app/web/retold-facto.js.map +1 -0
- package/source/services/web-app/web/retold-facto.min.js +2 -0
- package/source/services/web-app/web/retold-facto.min.js.map +1 -0
- package/source/services/web-app/web/simple/index.html +17 -0
- package/test/Facto_Browser_Integration_tests.js +798 -0
- package/test/RetoldFacto_tests.js +4117 -0
- package/test/fixtures/weather-readings.csv +17 -0
- package/test/fixtures/weather-stations.csv +9 -0
- package/test/model/MeadowModel-Extended.json +8497 -0
- package/test/model/MeadowModel-PICT.json +1 -0
- package/test/model/MeadowModel.json +1355 -0
- package/test/model/ddl/Facto.ddl +225 -0
- package/test/model/fable-configuration.json +14 -0
|
@@ -0,0 +1,1432 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Retold Facto — Government Data Loader Example
|
|
4
|
+
*
|
|
5
|
+
* Downloads 6 real US government datasets spanning all supported
|
|
6
|
+
* ingest formats (CSV, JSON, XML, TSV, Excel, Fixed-Width), creates
|
|
7
|
+
* Source/Dataset/SourceDocumentation records, and ingests the data.
|
|
8
|
+
*
|
|
9
|
+
* Supports two modes:
|
|
10
|
+
* --port <port> HTTP mode: exercises the Facto REST API against a running server
|
|
11
|
+
* --db <path> Direct mode: creates a standalone SQLite database (default)
|
|
12
|
+
*
|
|
13
|
+
* Usage:
|
|
14
|
+
* node examples/government-data-loader.js --port 8386
|
|
15
|
+
* node examples/government-data-loader.js --db examples/facto-government-data.sqlite
|
|
16
|
+
*
|
|
17
|
+
* @author Steven Velozo <steven@velozo.com>
|
|
18
|
+
*/
|
|
19
|
+
const libFs = require('fs');
|
|
20
|
+
const libPath = require('path');
|
|
21
|
+
const libHttp = require('http');
|
|
22
|
+
const libHttps = require('https');
|
|
23
|
+
|
|
24
|
+
// ================================================================
|
|
25
|
+
// Configuration
|
|
26
|
+
// ================================================================
|
|
27
|
+
|
|
28
|
+
let _DBPath = libPath.join(__dirname, 'facto-government-data.sqlite');
|
|
29
|
+
let _CLIPort = null;
|
|
30
|
+
|
|
31
|
+
// Parse CLI flags
|
|
32
|
+
let tmpArgs = process.argv.slice(2);
|
|
33
|
+
for (let i = 0; i < tmpArgs.length; i++)
|
|
34
|
+
{
|
|
35
|
+
if ((tmpArgs[i] === '--db' || tmpArgs[i] === '-d') && tmpArgs[i + 1])
|
|
36
|
+
{
|
|
37
|
+
_DBPath = libPath.resolve(tmpArgs[i + 1]);
|
|
38
|
+
i++;
|
|
39
|
+
}
|
|
40
|
+
else if ((tmpArgs[i] === '--port' || tmpArgs[i] === '-p') && tmpArgs[i + 1])
|
|
41
|
+
{
|
|
42
|
+
_CLIPort = parseInt(tmpArgs[i + 1], 10);
|
|
43
|
+
i++;
|
|
44
|
+
}
|
|
45
|
+
else if (tmpArgs[i] === '--help' || tmpArgs[i] === '-h')
|
|
46
|
+
{
|
|
47
|
+
console.log('Retold Facto — Government Data Loader');
|
|
48
|
+
console.log('');
|
|
49
|
+
console.log('Usage:');
|
|
50
|
+
console.log(' node examples/government-data-loader.js [options]');
|
|
51
|
+
console.log('');
|
|
52
|
+
console.log('Options:');
|
|
53
|
+
console.log(' --port, -p <port> HTTP mode: send data to a running Facto server');
|
|
54
|
+
console.log(' --db, -d <path> Direct mode: create a standalone SQLite database');
|
|
55
|
+
console.log(' (default: examples/facto-government-data.sqlite)');
|
|
56
|
+
console.log(' --help, -h Show this help message');
|
|
57
|
+
console.log('');
|
|
58
|
+
console.log('Examples:');
|
|
59
|
+
console.log(' node examples/government-data-loader.js --port 8386');
|
|
60
|
+
console.log(' node examples/government-data-loader.js --db /tmp/test.sqlite');
|
|
61
|
+
process.exit(0);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ================================================================
|
|
66
|
+
// Dataset Definitions
|
|
67
|
+
// ================================================================
|
|
68
|
+
|
|
69
|
+
const GOVERNMENT_DATASETS = [
|
|
70
|
+
{
|
|
71
|
+
name: 'USGS Earthquakes (Past 7 Days)',
|
|
72
|
+
sourceType: 'API',
|
|
73
|
+
sourceURL: 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv',
|
|
74
|
+
agency: 'US Geological Survey',
|
|
75
|
+
description: 'All earthquakes worldwide in the past 7 days, from the USGS Earthquake Hazards Program real-time feed.',
|
|
76
|
+
format: 'csv',
|
|
77
|
+
certainty: 0.95,
|
|
78
|
+
documentation: [
|
|
79
|
+
{
|
|
80
|
+
Name: 'Data Source Information',
|
|
81
|
+
DocumentType: 'Overview',
|
|
82
|
+
Description: 'USGS Earthquake Hazards Program provides real-time earthquake data via GeoJSON, CSV, and KML feeds.',
|
|
83
|
+
Content: 'Feed URL: https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv\nUpdate Frequency: Every 5 minutes\nCoverage: Global\nFields: time, latitude, longitude, depth, mag, magType, nst, gap, dmin, rms, net, id, updated, place, type, horizontalError, depthError, magError, magNst, status, locationSource, magSource'
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
Name: 'License',
|
|
87
|
+
DocumentType: 'License',
|
|
88
|
+
Description: 'USGS data is in the public domain.',
|
|
89
|
+
Content: 'USGS-authored or produced data and information are in the public domain and may be used without restriction. https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits'
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
name: 'Treasury Debt to the Penny',
|
|
95
|
+
sourceType: 'API',
|
|
96
|
+
sourceURL: 'https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v2/accounting/od/debt_to_penny?page[size]=100&format=json',
|
|
97
|
+
agency: 'US Treasury Department',
|
|
98
|
+
description: 'Daily US national debt figures from the Treasury Fiscal Data API, showing total public debt outstanding.',
|
|
99
|
+
format: 'json',
|
|
100
|
+
certainty: 0.98,
|
|
101
|
+
documentation: [
|
|
102
|
+
{
|
|
103
|
+
Name: 'API Documentation',
|
|
104
|
+
DocumentType: 'Overview',
|
|
105
|
+
Description: 'Treasury Fiscal Data API provides programmatic access to federal financial data.',
|
|
106
|
+
Content: 'API Base: https://api.fiscaldata.treasury.gov/\nEndpoint: /services/api/fiscal_service/v2/accounting/od/debt_to_penny\nFormat: JSON (default), CSV, XML\nFields: record_date, debt_held_public_amt, intragov_hold_amt, tot_pub_debt_out_amt, src_line_nbr, record_fiscal_year, record_fiscal_quarter, record_calendar_year, record_calendar_quarter, record_calendar_month, record_calendar_day'
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
Name: 'License',
|
|
110
|
+
DocumentType: 'License',
|
|
111
|
+
Description: 'US Government public domain data.',
|
|
112
|
+
Content: 'Data published by the US Treasury is in the public domain and freely available for reuse.'
|
|
113
|
+
}
|
|
114
|
+
]
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
name: 'Treasury Exchange Rates',
|
|
118
|
+
sourceType: 'API',
|
|
119
|
+
sourceURL: 'https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/od/rates_of_exchange?page[size]=50&format=xml',
|
|
120
|
+
agency: 'US Treasury Department',
|
|
121
|
+
description: 'Foreign currency exchange rates published by the US Treasury, in XML format.',
|
|
122
|
+
format: 'xml',
|
|
123
|
+
certainty: 0.95,
|
|
124
|
+
documentation: [
|
|
125
|
+
{
|
|
126
|
+
Name: 'API Documentation',
|
|
127
|
+
DocumentType: 'Overview',
|
|
128
|
+
Description: 'Treasury exchange rates used for federal government reporting of foreign currency transactions.',
|
|
129
|
+
Content: 'API Base: https://api.fiscaldata.treasury.gov/\nEndpoint: /services/api/fiscal_service/v1/accounting/od/rates_of_exchange\nFormat: XML\nFields: record_date, country, currency, country_currency_desc, exchange_rate, effective_date, src_line_nbr, record_fiscal_year, record_fiscal_quarter, record_calendar_year, record_calendar_quarter, record_calendar_month, record_calendar_day'
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
Name: 'License',
|
|
133
|
+
DocumentType: 'License',
|
|
134
|
+
Description: 'US Government public domain data.',
|
|
135
|
+
Content: 'Exchange rates published by the Treasury are public domain data.'
|
|
136
|
+
}
|
|
137
|
+
]
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
name: 'USGS Streamflow - Potomac River',
|
|
141
|
+
sourceType: 'API',
|
|
142
|
+
sourceURL: 'https://waterservices.usgs.gov/nwis/iv/?format=rdb&sites=01646500&period=P1D¶meterCd=00060',
|
|
143
|
+
agency: 'US Geological Survey',
|
|
144
|
+
description: 'Real-time streamflow (discharge) data for the Potomac River at Little Falls, DC. Tab-delimited RDB format with comment headers.',
|
|
145
|
+
format: 'tsv',
|
|
146
|
+
certainty: 0.90,
|
|
147
|
+
parseOptions: { delimiter: '\t', stripCommentLines: true },
|
|
148
|
+
documentation: [
|
|
149
|
+
{
|
|
150
|
+
Name: 'Data Source Information',
|
|
151
|
+
DocumentType: 'Overview',
|
|
152
|
+
Description: 'USGS National Water Information System (NWIS) provides real-time water data from thousands of monitoring stations.',
|
|
153
|
+
Content: 'Service URL: https://waterservices.usgs.gov/nwis/iv/\nFormat: RDB (tab-delimited with # comment lines)\nSite: 01646500 (Potomac River at Little Falls, Washington DC)\nParameter: 00060 (Discharge, cubic feet per second)\nPeriod: P1D (past 1 day)\nUpdate Frequency: Every 15 minutes'
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
Name: 'License',
|
|
157
|
+
DocumentType: 'License',
|
|
158
|
+
Description: 'USGS data is in the public domain.',
|
|
159
|
+
Content: 'All USGS data products are in the public domain.'
|
|
160
|
+
}
|
|
161
|
+
]
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
name: 'BLS Consumer Price Index',
|
|
165
|
+
sourceType: 'API',
|
|
166
|
+
sourceURL: 'https://api.bls.gov/publicAPI/v2/timeseries/data/CUUR0000SA0',
|
|
167
|
+
agency: 'Bureau of Labor Statistics',
|
|
168
|
+
description: 'Consumer Price Index for All Urban Consumers (CPI-U), seasonally adjusted. Downloaded as JSON, converted to Excel for format demonstration.',
|
|
169
|
+
format: 'excel',
|
|
170
|
+
certainty: 0.97,
|
|
171
|
+
documentation: [
|
|
172
|
+
{
|
|
173
|
+
Name: 'API Documentation',
|
|
174
|
+
DocumentType: 'Overview',
|
|
175
|
+
Description: 'BLS Public Data API provides access to economic time series data.',
|
|
176
|
+
Content: 'API Base: https://api.bls.gov/publicAPI/v2/\nSeries: CUUR0000SA0 (CPI for All Urban Consumers, All items, US city average, Not seasonally adjusted)\nFormat: JSON\nFields: year, period, periodName, value, footnotes'
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
Name: 'License',
|
|
180
|
+
DocumentType: 'License',
|
|
181
|
+
Description: 'BLS data is in the public domain.',
|
|
182
|
+
Content: 'Bureau of Labor Statistics data are in the public domain and may be reproduced without permission.'
|
|
183
|
+
}
|
|
184
|
+
]
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
name: 'NOAA Weather Station Inventory',
|
|
188
|
+
sourceType: 'File',
|
|
189
|
+
sourceURL: 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt',
|
|
190
|
+
agency: 'National Oceanic and Atmospheric Administration',
|
|
191
|
+
description: 'Global Historical Climatology Network (GHCN) daily weather station inventory. Fixed-width format containing station IDs, locations, and metadata.',
|
|
192
|
+
format: 'fixed-width',
|
|
193
|
+
certainty: 0.92,
|
|
194
|
+
parseOptions:
|
|
195
|
+
{
|
|
196
|
+
columns: [
|
|
197
|
+
{ name: 'ID', start: 1, width: 11 },
|
|
198
|
+
{ name: 'Latitude', start: 13, width: 8 },
|
|
199
|
+
{ name: 'Longitude', start: 22, width: 9 },
|
|
200
|
+
{ name: 'Elevation', start: 32, width: 6 },
|
|
201
|
+
{ name: 'State', start: 39, width: 2 },
|
|
202
|
+
{ name: 'Name', start: 42, width: 30 },
|
|
203
|
+
{ name: 'GSNFlag', start: 73, width: 3 },
|
|
204
|
+
{ name: 'HCNFlag', start: 77, width: 3 },
|
|
205
|
+
{ name: 'WMOId', start: 81, width: 5 }
|
|
206
|
+
]
|
|
207
|
+
},
|
|
208
|
+
// Limit to first 500 lines to keep example manageable
|
|
209
|
+
maxLines: 500,
|
|
210
|
+
documentation: [
|
|
211
|
+
{
|
|
212
|
+
Name: 'Data Source Information',
|
|
213
|
+
DocumentType: 'Overview',
|
|
214
|
+
Description: 'GHCN-Daily is the official archive of daily weather observations from land-based stations worldwide.',
|
|
215
|
+
Content: 'File: ghcnd-stations.txt\nFormat: Fixed-width text\nColumns: ID (11), Latitude (8), Longitude (9), Elevation (6), State (2), Name (30), GSN Flag (3), HCN/CRN Flag (3), WMO ID (5)\nCoverage: Global (~120,000+ stations)\nSource: NOAA National Centers for Environmental Information'
|
|
216
|
+
},
|
|
217
|
+
{
|
|
218
|
+
Name: 'License',
|
|
219
|
+
DocumentType: 'License',
|
|
220
|
+
Description: 'NOAA data is in the public domain.',
|
|
221
|
+
Content: 'NOAA data are generally in the public domain and may be used freely.'
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
Name: 'Data Dictionary',
|
|
225
|
+
DocumentType: 'DataDictionary',
|
|
226
|
+
Description: 'Column definitions for the GHCN station inventory file.',
|
|
227
|
+
Content: 'ID: 11-char station identifier (country code + network code + station number)\nLatitude: Decimal degrees, positive=North\nLongitude: Decimal degrees, positive=East\nElevation: Meters above sea level\nState: US state abbreviation (blank for non-US)\nName: Station name\nGSN Flag: GSN (GCOS Surface Network) indicator\nHCN Flag: HCN (US Historical Climatology Network) or CRN indicator\nWMO ID: World Meteorological Organization station number'
|
|
228
|
+
}
|
|
229
|
+
]
|
|
230
|
+
}
|
|
231
|
+
];
|
|
232
|
+
|
|
233
|
+
// ================================================================
|
|
234
|
+
// HTTPS Download Helper
|
|
235
|
+
// ================================================================
|
|
236
|
+
|
|
237
|
+
function downloadURL(pURL, fCallback)
|
|
238
|
+
{
|
|
239
|
+
console.log(` Downloading: ${pURL.substring(0, 80)}...`);
|
|
240
|
+
|
|
241
|
+
let tmpRequest = libHttps.get(pURL,
|
|
242
|
+
(pResponse) =>
|
|
243
|
+
{
|
|
244
|
+
// Follow redirects
|
|
245
|
+
if (pResponse.statusCode >= 300 && pResponse.statusCode < 400 && pResponse.headers.location)
|
|
246
|
+
{
|
|
247
|
+
let tmpRedirectURL = pResponse.headers.location;
|
|
248
|
+
if (!tmpRedirectURL.startsWith('http'))
|
|
249
|
+
{
|
|
250
|
+
let tmpParsedURL = new URL(pURL);
|
|
251
|
+
tmpRedirectURL = tmpParsedURL.origin + tmpRedirectURL;
|
|
252
|
+
}
|
|
253
|
+
console.log(` Redirecting to: ${tmpRedirectURL.substring(0, 80)}...`);
|
|
254
|
+
return downloadURL(tmpRedirectURL, fCallback);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (pResponse.statusCode !== 200)
|
|
258
|
+
{
|
|
259
|
+
return fCallback(new Error(`HTTP ${pResponse.statusCode} for ${pURL}`));
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
let tmpChunks = [];
|
|
263
|
+
pResponse.on('data', (pChunk) => tmpChunks.push(pChunk));
|
|
264
|
+
pResponse.on('end', () =>
|
|
265
|
+
{
|
|
266
|
+
let tmpBuffer = Buffer.concat(tmpChunks);
|
|
267
|
+
return fCallback(null, tmpBuffer);
|
|
268
|
+
});
|
|
269
|
+
pResponse.on('error', (pError) => fCallback(pError));
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
tmpRequest.on('error', (pError) => fCallback(pError));
|
|
273
|
+
tmpRequest.setTimeout(30000, () =>
|
|
274
|
+
{
|
|
275
|
+
tmpRequest.destroy();
|
|
276
|
+
return fCallback(new Error(`Timeout downloading ${pURL}`));
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// ================================================================
|
|
281
|
+
// HTTP API Helpers (for HTTP mode)
|
|
282
|
+
// ================================================================
|
|
283
|
+
|
|
284
|
+
function httpRequest(pMethod, pPort, pPath, pBody, fCallback)
|
|
285
|
+
{
|
|
286
|
+
let tmpBodyString = pBody ? JSON.stringify(pBody) : null;
|
|
287
|
+
|
|
288
|
+
let tmpOptions =
|
|
289
|
+
{
|
|
290
|
+
hostname: 'localhost',
|
|
291
|
+
port: pPort,
|
|
292
|
+
path: pPath,
|
|
293
|
+
method: pMethod,
|
|
294
|
+
headers: { 'Content-Type': 'application/json' }
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
if (tmpBodyString)
|
|
298
|
+
{
|
|
299
|
+
tmpOptions.headers['Content-Length'] = Buffer.byteLength(tmpBodyString);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
let tmpRequest = libHttp.request(tmpOptions,
|
|
303
|
+
(pResponse) =>
|
|
304
|
+
{
|
|
305
|
+
let tmpChunks = [];
|
|
306
|
+
pResponse.on('data', (pChunk) => tmpChunks.push(pChunk));
|
|
307
|
+
pResponse.on('end', () =>
|
|
308
|
+
{
|
|
309
|
+
let tmpRaw = Buffer.concat(tmpChunks).toString('utf8');
|
|
310
|
+
try
|
|
311
|
+
{
|
|
312
|
+
return fCallback(null, JSON.parse(tmpRaw));
|
|
313
|
+
}
|
|
314
|
+
catch (pParseError)
|
|
315
|
+
{
|
|
316
|
+
return fCallback(new Error(`Failed to parse response from ${pPath}: ${tmpRaw.substring(0, 200)}`));
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
pResponse.on('error', (pError) => fCallback(pError));
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
tmpRequest.on('error', (pError) => fCallback(pError));
|
|
323
|
+
tmpRequest.setTimeout(60000, () =>
|
|
324
|
+
{
|
|
325
|
+
tmpRequest.destroy();
|
|
326
|
+
return fCallback(new Error(`Timeout on ${pMethod} ${pPath}`));
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
if (tmpBodyString)
|
|
330
|
+
{
|
|
331
|
+
tmpRequest.write(tmpBodyString);
|
|
332
|
+
}
|
|
333
|
+
tmpRequest.end();
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function httpPost(pPort, pPath, pBody, fCallback)
|
|
337
|
+
{
|
|
338
|
+
return httpRequest('POST', pPort, pPath, pBody, fCallback);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function httpPut(pPort, pPath, pBody, fCallback)
|
|
342
|
+
{
|
|
343
|
+
return httpRequest('PUT', pPort, pPath, pBody, fCallback);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function httpGet(pPort, pPath, fCallback)
|
|
347
|
+
{
|
|
348
|
+
return httpRequest('GET', pPort, pPath, null, fCallback);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Track created Sources by agency name to avoid duplicates
|
|
352
|
+
let _SourcesByAgency = {};
|
|
353
|
+
|
|
354
|
+
// ================================================================
|
|
355
|
+
// Bootstrap
|
|
356
|
+
// ================================================================
|
|
357
|
+
|
|
358
|
+
console.log('==========================================================');
|
|
359
|
+
console.log(' Retold Facto — Government Data Loader');
|
|
360
|
+
console.log('==========================================================');
|
|
361
|
+
|
|
362
|
+
if (_CLIPort)
|
|
363
|
+
{
|
|
364
|
+
console.log(` Mode: HTTP (port ${_CLIPort})`);
|
|
365
|
+
console.log('');
|
|
366
|
+
|
|
367
|
+
// In HTTP mode, just start processing datasets immediately
|
|
368
|
+
processAllDatasets(null, _CLIPort);
|
|
369
|
+
}
|
|
370
|
+
else
|
|
371
|
+
{
|
|
372
|
+
console.log(` Mode: Direct (SQLite)`);
|
|
373
|
+
console.log(` Database: ${_DBPath}`);
|
|
374
|
+
console.log('');
|
|
375
|
+
|
|
376
|
+
bootstrapDirect();
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// ================================================================
|
|
380
|
+
// Direct-mode Bootstrap (SQLite + Fable)
|
|
381
|
+
// ================================================================
|
|
382
|
+
|
|
383
|
+
function bootstrapDirect()
|
|
384
|
+
{
|
|
385
|
+
const libFable = require('fable');
|
|
386
|
+
const libMeadowConnectionSQLite = require('meadow-connection-sqlite');
|
|
387
|
+
const libRetoldFacto = require('../source/Retold-Facto.js');
|
|
388
|
+
|
|
389
|
+
// Remove existing database so we start fresh
|
|
390
|
+
if (libFs.existsSync(_DBPath))
|
|
391
|
+
{
|
|
392
|
+
libFs.unlinkSync(_DBPath);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Ensure parent directory exists
|
|
396
|
+
let tmpDbDir = libPath.dirname(_DBPath);
|
|
397
|
+
if (!libFs.existsSync(tmpDbDir))
|
|
398
|
+
{
|
|
399
|
+
libFs.mkdirSync(tmpDbDir, { recursive: true });
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
let _Settings = (
|
|
403
|
+
{
|
|
404
|
+
Product: 'FactoGovLoader',
|
|
405
|
+
ProductVersion: '0.0.1',
|
|
406
|
+
APIServerPort: 0,
|
|
407
|
+
LogStreams:
|
|
408
|
+
[
|
|
409
|
+
{
|
|
410
|
+
streamtype: 'console',
|
|
411
|
+
level: 'warn'
|
|
412
|
+
}
|
|
413
|
+
],
|
|
414
|
+
SQLite:
|
|
415
|
+
{
|
|
416
|
+
SQLiteFilePath: _DBPath
|
|
417
|
+
}
|
|
418
|
+
});
|
|
419
|
+
|
|
420
|
+
let _Fable = new libFable(_Settings);
|
|
421
|
+
|
|
422
|
+
_Fable.serviceManager.addServiceType('MeadowSQLiteProvider', libMeadowConnectionSQLite);
|
|
423
|
+
_Fable.serviceManager.instantiateServiceProvider('MeadowSQLiteProvider');
|
|
424
|
+
|
|
425
|
+
_Fable.MeadowSQLiteProvider.connectAsync(
|
|
426
|
+
(pConnectError) =>
|
|
427
|
+
{
|
|
428
|
+
if (pConnectError)
|
|
429
|
+
{
|
|
430
|
+
console.error(`SQLite connection error: ${pConnectError}`);
|
|
431
|
+
process.exit(1);
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
_Fable.settings.MeadowProvider = 'SQLite';
|
|
435
|
+
|
|
436
|
+
// Create schema
|
|
437
|
+
_Fable.MeadowSQLiteProvider.db.exec(libRetoldFacto.FACTO_SCHEMA_SQL);
|
|
438
|
+
console.log(' Schema created.');
|
|
439
|
+
|
|
440
|
+
// Set up the Facto service (no web server needed)
|
|
441
|
+
_Fable.serviceManager.addServiceType('RetoldFacto', libRetoldFacto);
|
|
442
|
+
_Fable.serviceManager.instantiateServiceProvider('RetoldFacto',
|
|
443
|
+
{
|
|
444
|
+
StorageProvider: 'SQLite',
|
|
445
|
+
AutoStartOrator: false,
|
|
446
|
+
|
|
447
|
+
FullMeadowSchemaPath: libPath.join(__dirname, '..', 'test', 'model') + '/',
|
|
448
|
+
FullMeadowSchemaFilename: 'MeadowModel-Extended.json',
|
|
449
|
+
|
|
450
|
+
Endpoints:
|
|
451
|
+
{
|
|
452
|
+
MeadowEndpoints: true,
|
|
453
|
+
SourceManager: false,
|
|
454
|
+
RecordManager: false,
|
|
455
|
+
DatasetManager: false,
|
|
456
|
+
IngestEngine: true,
|
|
457
|
+
ProjectionEngine: false,
|
|
458
|
+
WebUI: false
|
|
459
|
+
}
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
_Fable.RetoldFacto.initializeService(
|
|
463
|
+
(pInitError) =>
|
|
464
|
+
{
|
|
465
|
+
if (pInitError)
|
|
466
|
+
{
|
|
467
|
+
console.error(`Initialization error: ${pInitError}`);
|
|
468
|
+
process.exit(1);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
console.log(' Facto service initialized.');
|
|
472
|
+
console.log('');
|
|
473
|
+
|
|
474
|
+
processAllDatasets(_Fable, null);
|
|
475
|
+
});
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// ================================================================
|
|
480
|
+
// Agency-to-Source Mapping
|
|
481
|
+
// ================================================================
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Find or create a Source for a given agency.
|
|
485
|
+
* In HTTP mode, queries the server first; in direct mode, checks our local map.
|
|
486
|
+
*/
|
|
487
|
+
function findOrCreateSource(pFable, pPort, pAgencyName, pSourceType, fCallback)
|
|
488
|
+
{
|
|
489
|
+
let tmpIsHTTP = (pPort !== null);
|
|
490
|
+
|
|
491
|
+
// Already created this agency's Source in this run?
|
|
492
|
+
if (_SourcesByAgency[pAgencyName])
|
|
493
|
+
{
|
|
494
|
+
console.log(` Source reused: #${_SourcesByAgency[pAgencyName]} (${pAgencyName})`);
|
|
495
|
+
return fCallback(null, _SourcesByAgency[pAgencyName]);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
let tmpSourceData =
|
|
499
|
+
{
|
|
500
|
+
Name: pAgencyName,
|
|
501
|
+
Type: pSourceType,
|
|
502
|
+
Protocol: 'HTTPS',
|
|
503
|
+
Description: `Data source: ${pAgencyName}`,
|
|
504
|
+
Active: 1
|
|
505
|
+
};
|
|
506
|
+
|
|
507
|
+
if (tmpIsHTTP)
|
|
508
|
+
{
|
|
509
|
+
// Check if source already exists on the server by name
|
|
510
|
+
// Meadow FilteredTo: /1.0/{PluralEntity}/FilteredTo/FBV~Field~EQ~Value/Begin/Cap
|
|
511
|
+
httpGet(pPort, `/1.0/Sources/FilteredTo/FBV~Name~EQ~${encodeURIComponent(pAgencyName)}/0/10`,
|
|
512
|
+
(pError, pResult) =>
|
|
513
|
+
{
|
|
514
|
+
// Meadow doReads returns an array directly
|
|
515
|
+
let tmpRecords = Array.isArray(pResult) ? pResult : [];
|
|
516
|
+
if (tmpRecords.length > 0)
|
|
517
|
+
{
|
|
518
|
+
let tmpExistingID = tmpRecords[0].IDSource;
|
|
519
|
+
_SourcesByAgency[pAgencyName] = tmpExistingID;
|
|
520
|
+
console.log(` Source found on server: #${tmpExistingID} (${pAgencyName})`);
|
|
521
|
+
return fCallback(null, tmpExistingID);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Create new source
|
|
525
|
+
httpPost(pPort, '/1.0/Source', tmpSourceData,
|
|
526
|
+
(pCreateError, pCreateResult) =>
|
|
527
|
+
{
|
|
528
|
+
if (pCreateError)
|
|
529
|
+
{
|
|
530
|
+
console.error(` Error creating source: ${pCreateError.message}`);
|
|
531
|
+
return fCallback(pCreateError);
|
|
532
|
+
}
|
|
533
|
+
let tmpNewID = pCreateResult.IDSource;
|
|
534
|
+
_SourcesByAgency[pAgencyName] = tmpNewID;
|
|
535
|
+
console.log(` Source created: #${tmpNewID} (${pAgencyName})`);
|
|
536
|
+
|
|
537
|
+
// Activate it
|
|
538
|
+
httpPut(pPort, `/facto/source/${tmpNewID}/activate`, {},
|
|
539
|
+
(pActivateError) =>
|
|
540
|
+
{
|
|
541
|
+
if (!pActivateError)
|
|
542
|
+
{
|
|
543
|
+
console.log(` Source #${tmpNewID} activated`);
|
|
544
|
+
}
|
|
545
|
+
return fCallback(null, tmpNewID);
|
|
546
|
+
});
|
|
547
|
+
});
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
else
|
|
551
|
+
{
|
|
552
|
+
let tmpQuery = pFable.DAL.Source.query.clone()
|
|
553
|
+
.addRecord(tmpSourceData);
|
|
554
|
+
|
|
555
|
+
pFable.DAL.Source.doCreate(tmpQuery,
|
|
556
|
+
(pError, pQuery, pQueryRead, pRecord) =>
|
|
557
|
+
{
|
|
558
|
+
if (pError)
|
|
559
|
+
{
|
|
560
|
+
console.error(` Error creating source: ${pError}`);
|
|
561
|
+
return fCallback(pError);
|
|
562
|
+
}
|
|
563
|
+
let tmpNewID = pRecord.IDSource;
|
|
564
|
+
_SourcesByAgency[pAgencyName] = tmpNewID;
|
|
565
|
+
console.log(` Source created: #${tmpNewID} (${pAgencyName})`);
|
|
566
|
+
return fCallback(null, tmpNewID);
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Find or create a Dataset by name.
|
|
573
|
+
* In HTTP mode, queries the server first; in direct mode, always creates.
|
|
574
|
+
*/
|
|
575
|
+
function findOrCreateDataset(pFable, pPort, pDatasetDef, fCallback)
|
|
576
|
+
{
|
|
577
|
+
let tmpIsHTTP = (pPort !== null);
|
|
578
|
+
let tmpDatasetData =
|
|
579
|
+
{
|
|
580
|
+
Name: pDatasetDef.name,
|
|
581
|
+
Type: 'Raw',
|
|
582
|
+
Description: pDatasetDef.description
|
|
583
|
+
};
|
|
584
|
+
|
|
585
|
+
if (tmpIsHTTP)
|
|
586
|
+
{
|
|
587
|
+
// Check if dataset already exists on the server by name
|
|
588
|
+
// Meadow FilteredTo: /1.0/{PluralEntity}/FilteredTo/FBV~Field~EQ~Value/Begin/Cap
|
|
589
|
+
httpGet(pPort, `/1.0/Datasets/FilteredTo/FBV~Name~EQ~${encodeURIComponent(pDatasetDef.name)}/0/10`,
|
|
590
|
+
(pError, pResult) =>
|
|
591
|
+
{
|
|
592
|
+
let tmpRecords = Array.isArray(pResult) ? pResult : [];
|
|
593
|
+
if (tmpRecords.length > 0)
|
|
594
|
+
{
|
|
595
|
+
let tmpExistingID = tmpRecords[0].IDDataset;
|
|
596
|
+
console.log(` Dataset found on server: #${tmpExistingID} (${pDatasetDef.name})`);
|
|
597
|
+
return fCallback(null, tmpExistingID);
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
httpPost(pPort, '/1.0/Dataset', tmpDatasetData,
|
|
601
|
+
(pCreateError, pCreateResult) =>
|
|
602
|
+
{
|
|
603
|
+
if (pCreateError)
|
|
604
|
+
{
|
|
605
|
+
console.error(` Error creating dataset: ${pCreateError.message}`);
|
|
606
|
+
return fCallback(pCreateError);
|
|
607
|
+
}
|
|
608
|
+
let tmpNewID = pCreateResult.IDDataset;
|
|
609
|
+
console.log(` Dataset created: #${tmpNewID}`);
|
|
610
|
+
return fCallback(null, tmpNewID);
|
|
611
|
+
});
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
else
|
|
615
|
+
{
|
|
616
|
+
let tmpQuery = pFable.DAL.Dataset.query.clone()
|
|
617
|
+
.addRecord(tmpDatasetData);
|
|
618
|
+
|
|
619
|
+
pFable.DAL.Dataset.doCreate(tmpQuery,
|
|
620
|
+
(pError, pQuery, pQueryRead, pRecord) =>
|
|
621
|
+
{
|
|
622
|
+
if (pError)
|
|
623
|
+
{
|
|
624
|
+
console.error(` Error creating dataset: ${pError}`);
|
|
625
|
+
return fCallback(pError);
|
|
626
|
+
}
|
|
627
|
+
console.log(` Dataset created: #${pRecord.IDDataset}`);
|
|
628
|
+
return fCallback(null, pRecord.IDDataset);
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// ================================================================
|
|
634
|
+
// Dataset Processing Entry Point
|
|
635
|
+
// ================================================================
|
|
636
|
+
|
|
637
|
+
function processAllDatasets(pFable, pPort)
|
|
638
|
+
{
|
|
639
|
+
// Reset agency tracking for this run
|
|
640
|
+
_SourcesByAgency = {};
|
|
641
|
+
|
|
642
|
+
processDataset(pFable, pPort, 0,
|
|
643
|
+
(pError) =>
|
|
644
|
+
{
|
|
645
|
+
if (pError)
|
|
646
|
+
{
|
|
647
|
+
console.error(`\nError during processing: ${pError.message}`);
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
printSummary(pFable, pPort);
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// ================================================================
|
|
655
|
+
// Sequential Dataset Processing
|
|
656
|
+
// ================================================================
|
|
657
|
+
|
|
658
|
+
function processDataset(pFable, pPort, pIndex, fDone)
|
|
659
|
+
{
|
|
660
|
+
if (pIndex >= GOVERNMENT_DATASETS.length)
|
|
661
|
+
{
|
|
662
|
+
return fDone();
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
let tmpDatasetDef = GOVERNMENT_DATASETS[pIndex];
|
|
666
|
+
let tmpIsHTTP = (pPort !== null);
|
|
667
|
+
|
|
668
|
+
console.log(`----------------------------------------------------------`);
|
|
669
|
+
console.log(` [${pIndex + 1}/${GOVERNMENT_DATASETS.length}] ${tmpDatasetDef.name}`);
|
|
670
|
+
console.log(` Format: ${tmpDatasetDef.format.toUpperCase()} | Agency: ${tmpDatasetDef.agency}`);
|
|
671
|
+
console.log(` Certainty: ${tmpDatasetDef.certainty}`);
|
|
672
|
+
console.log('');
|
|
673
|
+
|
|
674
|
+
let tmpSourceID = 0;
|
|
675
|
+
let tmpDatasetID = 0;
|
|
676
|
+
|
|
677
|
+
// Step chain — each step calls the next
|
|
678
|
+
let tmpSteps = [];
|
|
679
|
+
|
|
680
|
+
// Step 1: Find or Create Source (deduped by agency name)
|
|
681
|
+
tmpSteps.push(
|
|
682
|
+
(fStep) =>
|
|
683
|
+
{
|
|
684
|
+
findOrCreateSource(pFable, pPort, tmpDatasetDef.agency, tmpDatasetDef.sourceType,
|
|
685
|
+
(pError, pSourceID) =>
|
|
686
|
+
{
|
|
687
|
+
if (!pError && pSourceID)
|
|
688
|
+
{
|
|
689
|
+
tmpSourceID = pSourceID;
|
|
690
|
+
}
|
|
691
|
+
return fStep();
|
|
692
|
+
});
|
|
693
|
+
});
|
|
694
|
+
|
|
695
|
+
// Step 2: Find or Create Dataset (deduped by name in HTTP mode)
|
|
696
|
+
tmpSteps.push(
|
|
697
|
+
(fStep) =>
|
|
698
|
+
{
|
|
699
|
+
findOrCreateDataset(pFable, pPort, tmpDatasetDef,
|
|
700
|
+
(pError, pDatasetID) =>
|
|
701
|
+
{
|
|
702
|
+
if (!pError && pDatasetID)
|
|
703
|
+
{
|
|
704
|
+
tmpDatasetID = pDatasetID;
|
|
705
|
+
}
|
|
706
|
+
return fStep();
|
|
707
|
+
});
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
// Step 3: Set VersionPolicy on Dataset
|
|
711
|
+
tmpSteps.push(
|
|
712
|
+
(fStep) =>
|
|
713
|
+
{
|
|
714
|
+
if (!tmpDatasetID)
|
|
715
|
+
{
|
|
716
|
+
return fStep();
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
if (tmpIsHTTP)
|
|
720
|
+
{
|
|
721
|
+
httpPut(pPort, `/facto/dataset/${tmpDatasetID}/version-policy`,
|
|
722
|
+
{ VersionPolicy: 'Append' },
|
|
723
|
+
(pError) =>
|
|
724
|
+
{
|
|
725
|
+
if (!pError)
|
|
726
|
+
{
|
|
727
|
+
console.log(` VersionPolicy set: Append`);
|
|
728
|
+
}
|
|
729
|
+
return fStep();
|
|
730
|
+
});
|
|
731
|
+
}
|
|
732
|
+
else
|
|
733
|
+
{
|
|
734
|
+
// Direct mode: update the Dataset record
|
|
735
|
+
let tmpQuery = pFable.DAL.Dataset.query.clone()
|
|
736
|
+
.addRecord({ IDDataset: tmpDatasetID, VersionPolicy: 'Append' });
|
|
737
|
+
|
|
738
|
+
pFable.DAL.Dataset.doUpdate(tmpQuery,
|
|
739
|
+
(pError) =>
|
|
740
|
+
{
|
|
741
|
+
if (!pError)
|
|
742
|
+
{
|
|
743
|
+
console.log(` VersionPolicy set: Append`);
|
|
744
|
+
}
|
|
745
|
+
return fStep();
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
// Step 4: Link Dataset to Source
|
|
751
|
+
tmpSteps.push(
|
|
752
|
+
(fStep) =>
|
|
753
|
+
{
|
|
754
|
+
if (!tmpSourceID || !tmpDatasetID)
|
|
755
|
+
{
|
|
756
|
+
return fStep();
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
if (tmpIsHTTP)
|
|
760
|
+
{
|
|
761
|
+
httpPost(pPort, `/facto/dataset/${tmpDatasetID}/source`,
|
|
762
|
+
{
|
|
763
|
+
IDSource: tmpSourceID,
|
|
764
|
+
ReliabilityWeight: tmpDatasetDef.certainty
|
|
765
|
+
},
|
|
766
|
+
(pError, pResult) =>
|
|
767
|
+
{
|
|
768
|
+
if (pError)
|
|
769
|
+
{
|
|
770
|
+
console.error(` Error linking source to dataset: ${pError.message}`);
|
|
771
|
+
}
|
|
772
|
+
else
|
|
773
|
+
{
|
|
774
|
+
console.log(` DatasetSource linked (weight: ${tmpDatasetDef.certainty})`);
|
|
775
|
+
}
|
|
776
|
+
return fStep();
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
else
|
|
780
|
+
{
|
|
781
|
+
let tmpQuery = pFable.DAL.DatasetSource.query.clone()
|
|
782
|
+
.addRecord(
|
|
783
|
+
{
|
|
784
|
+
IDDataset: tmpDatasetID,
|
|
785
|
+
IDSource: tmpSourceID,
|
|
786
|
+
ReliabilityWeight: tmpDatasetDef.certainty
|
|
787
|
+
});
|
|
788
|
+
|
|
789
|
+
pFable.DAL.DatasetSource.doCreate(tmpQuery,
|
|
790
|
+
(pError) =>
|
|
791
|
+
{
|
|
792
|
+
if (!pError)
|
|
793
|
+
{
|
|
794
|
+
console.log(` DatasetSource linked (weight: ${tmpDatasetDef.certainty})`);
|
|
795
|
+
}
|
|
796
|
+
return fStep();
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
});
|
|
800
|
+
|
|
801
|
+
// Step 5: Create SourceDocumentation
|
|
802
|
+
tmpSteps.push(
|
|
803
|
+
(fStep) =>
|
|
804
|
+
{
|
|
805
|
+
if (!tmpSourceID || !tmpDatasetDef.documentation)
|
|
806
|
+
{
|
|
807
|
+
return fStep();
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
let tmpDocsRemaining = tmpDatasetDef.documentation.length;
|
|
811
|
+
let tmpDocsCreated = 0;
|
|
812
|
+
|
|
813
|
+
for (let d = 0; d < tmpDatasetDef.documentation.length; d++)
|
|
814
|
+
{
|
|
815
|
+
let tmpDoc = tmpDatasetDef.documentation[d];
|
|
816
|
+
let tmpDocData =
|
|
817
|
+
{
|
|
818
|
+
IDSource: tmpSourceID,
|
|
819
|
+
Name: tmpDoc.Name,
|
|
820
|
+
DocumentType: tmpDoc.DocumentType,
|
|
821
|
+
MimeType: 'text/plain',
|
|
822
|
+
Description: tmpDoc.Description,
|
|
823
|
+
Content: tmpDoc.Content
|
|
824
|
+
};
|
|
825
|
+
|
|
826
|
+
let tmpDocDone = () =>
|
|
827
|
+
{
|
|
828
|
+
tmpDocsCreated++;
|
|
829
|
+
if (tmpDocsCreated >= tmpDocsRemaining)
|
|
830
|
+
{
|
|
831
|
+
console.log(` Documentation created: ${tmpDocsRemaining} doc(s)`);
|
|
832
|
+
return fStep();
|
|
833
|
+
}
|
|
834
|
+
};
|
|
835
|
+
|
|
836
|
+
if (tmpIsHTTP)
|
|
837
|
+
{
|
|
838
|
+
httpPost(pPort, '/1.0/SourceDocumentation', tmpDocData,
|
|
839
|
+
(pError) =>
|
|
840
|
+
{
|
|
841
|
+
return tmpDocDone();
|
|
842
|
+
});
|
|
843
|
+
}
|
|
844
|
+
else
|
|
845
|
+
{
|
|
846
|
+
let tmpQuery = pFable.DAL.SourceDocumentation.query.clone()
|
|
847
|
+
.addRecord(tmpDocData);
|
|
848
|
+
|
|
849
|
+
pFable.DAL.SourceDocumentation.doCreate(tmpQuery,
|
|
850
|
+
(pError) =>
|
|
851
|
+
{
|
|
852
|
+
return tmpDocDone();
|
|
853
|
+
});
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
});
|
|
857
|
+
|
|
858
|
+
// Step 6: Download and Ingest
|
|
859
|
+
tmpSteps.push(
|
|
860
|
+
(fStep) =>
|
|
861
|
+
{
|
|
862
|
+
if (!tmpSourceID || !tmpDatasetID)
|
|
863
|
+
{
|
|
864
|
+
console.error(' Skipping ingest: no source/dataset ID.');
|
|
865
|
+
return fStep();
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
downloadURL(tmpDatasetDef.sourceURL,
|
|
869
|
+
(pDownloadError, pBuffer) =>
|
|
870
|
+
{
|
|
871
|
+
if (pDownloadError)
|
|
872
|
+
{
|
|
873
|
+
console.error(` Download error: ${pDownloadError.message}`);
|
|
874
|
+
return fStep();
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
console.log(` Downloaded: ${pBuffer.length} bytes`);
|
|
878
|
+
|
|
879
|
+
if (tmpIsHTTP)
|
|
880
|
+
{
|
|
881
|
+
ingestDataHTTP(pPort, tmpDatasetDef, pBuffer, tmpDatasetID, tmpSourceID,
|
|
882
|
+
(pIngestError, pResult) =>
|
|
883
|
+
{
|
|
884
|
+
if (pIngestError)
|
|
885
|
+
{
|
|
886
|
+
console.error(` Ingest error: ${pIngestError.message}`);
|
|
887
|
+
}
|
|
888
|
+
else if (pResult)
|
|
889
|
+
{
|
|
890
|
+
console.log(` Ingested: ${pResult.Ingested} records (${pResult.Errors} errors)`);
|
|
891
|
+
if (pResult.DatasetVersion)
|
|
892
|
+
{
|
|
893
|
+
console.log(` Version: ${pResult.DatasetVersion} | Duplicate: ${pResult.IsDuplicate ? 'Yes' : 'No'}`);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
return fStep();
|
|
897
|
+
});
|
|
898
|
+
}
|
|
899
|
+
else
|
|
900
|
+
{
|
|
901
|
+
ingestDataDirect(pFable, tmpDatasetDef, pBuffer, tmpDatasetID, tmpSourceID,
|
|
902
|
+
(pIngestError, pResult) =>
|
|
903
|
+
{
|
|
904
|
+
if (pIngestError)
|
|
905
|
+
{
|
|
906
|
+
console.error(` Ingest error: ${pIngestError.message}`);
|
|
907
|
+
}
|
|
908
|
+
else if (pResult)
|
|
909
|
+
{
|
|
910
|
+
console.log(` Ingested: ${pResult.Ingested} records (${pResult.Errors} errors)`);
|
|
911
|
+
}
|
|
912
|
+
return fStep();
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
});
|
|
916
|
+
});
|
|
917
|
+
|
|
918
|
+
// Step 7: Print per-dataset stats
|
|
919
|
+
tmpSteps.push(
|
|
920
|
+
(fStep) =>
|
|
921
|
+
{
|
|
922
|
+
if (!tmpSourceID || !tmpDatasetID)
|
|
923
|
+
{
|
|
924
|
+
return fStep();
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
if (tmpIsHTTP)
|
|
928
|
+
{
|
|
929
|
+
httpGet(pPort, `/facto/source/${tmpSourceID}/summary`,
|
|
930
|
+
(pError, pSourceSummary) =>
|
|
931
|
+
{
|
|
932
|
+
if (!pError && pSourceSummary)
|
|
933
|
+
{
|
|
934
|
+
console.log(` Source summary: ${pSourceSummary.RecordCount} records, ${pSourceSummary.DocumentationCount} docs`);
|
|
935
|
+
}
|
|
936
|
+
httpGet(pPort, `/facto/dataset/${tmpDatasetID}/stats`,
|
|
937
|
+
(pError, pDatasetStats) =>
|
|
938
|
+
{
|
|
939
|
+
if (!pError && pDatasetStats)
|
|
940
|
+
{
|
|
941
|
+
console.log(` Dataset stats: ${pDatasetStats.RecordCount} records, ${pDatasetStats.SourceCount} source(s)`);
|
|
942
|
+
}
|
|
943
|
+
return fStep();
|
|
944
|
+
});
|
|
945
|
+
});
|
|
946
|
+
}
|
|
947
|
+
else
|
|
948
|
+
{
|
|
949
|
+
// Direct mode: quick count for feedback
|
|
950
|
+
let tmpQuery = pFable.DAL.Record.query.clone()
|
|
951
|
+
.addFilter('IDDataset', tmpDatasetID)
|
|
952
|
+
.addFilter('Deleted', 0);
|
|
953
|
+
pFable.DAL.Record.doCount(tmpQuery,
|
|
954
|
+
(pError, pQuery, pCount) =>
|
|
955
|
+
{
|
|
956
|
+
let tmpRecordCount = (typeof pCount === 'number') ? pCount : parseInt(pCount, 10) || 0;
|
|
957
|
+
console.log(` Dataset #${tmpDatasetID}: ${tmpRecordCount} records stored`);
|
|
958
|
+
return fStep();
|
|
959
|
+
});
|
|
960
|
+
}
|
|
961
|
+
});
|
|
962
|
+
|
|
963
|
+
// Run steps sequentially
|
|
964
|
+
runStepChain(tmpSteps, 0,
|
|
965
|
+
() =>
|
|
966
|
+
{
|
|
967
|
+
console.log('');
|
|
968
|
+
processDataset(pFable, pPort, pIndex + 1, fDone);
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
/**
|
|
973
|
+
* Run an array of sequential step functions.
|
|
974
|
+
*/
|
|
975
|
+
function runStepChain(pSteps, pIndex, fDone)
|
|
976
|
+
{
|
|
977
|
+
if (pIndex >= pSteps.length)
|
|
978
|
+
{
|
|
979
|
+
return fDone();
|
|
980
|
+
}
|
|
981
|
+
pSteps[pIndex](
|
|
982
|
+
() =>
|
|
983
|
+
{
|
|
984
|
+
runStepChain(pSteps, pIndex + 1, fDone);
|
|
985
|
+
});
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
// ================================================================
|
|
989
|
+
// Ingest: HTTP Mode (POST to /facto/ingest/file)
|
|
990
|
+
// ================================================================
|
|
991
|
+
|
|
992
|
+
function ingestDataHTTP(pPort, pDatasetDef, pBuffer, pIDDataset, pIDSource, fCallback)
|
|
993
|
+
{
|
|
994
|
+
let tmpFormat = pDatasetDef.format;
|
|
995
|
+
let tmpContent = pBuffer.toString('utf8');
|
|
996
|
+
|
|
997
|
+
// Build the ingest request body
|
|
998
|
+
let tmpBody =
|
|
999
|
+
{
|
|
1000
|
+
IDDataset: pIDDataset,
|
|
1001
|
+
IDSource: pIDSource
|
|
1002
|
+
};
|
|
1003
|
+
|
|
1004
|
+
switch (tmpFormat)
|
|
1005
|
+
{
|
|
1006
|
+
case 'csv':
|
|
1007
|
+
tmpBody.Format = 'csv';
|
|
1008
|
+
tmpBody.Content = tmpContent;
|
|
1009
|
+
break;
|
|
1010
|
+
|
|
1011
|
+
case 'json':
|
|
1012
|
+
tmpBody.Format = 'json';
|
|
1013
|
+
tmpBody.Content = tmpContent;
|
|
1014
|
+
break;
|
|
1015
|
+
|
|
1016
|
+
case 'xml':
|
|
1017
|
+
tmpBody.Format = 'xml';
|
|
1018
|
+
tmpBody.Content = tmpContent;
|
|
1019
|
+
break;
|
|
1020
|
+
|
|
1021
|
+
case 'tsv':
|
|
1022
|
+
// Strip comment lines before sending, send as CSV with tab delimiter
|
|
1023
|
+
tmpBody.Format = 'csv';
|
|
1024
|
+
tmpBody.Content = tmpContent.split('\n')
|
|
1025
|
+
.filter((pLine) => !pLine.startsWith('#'))
|
|
1026
|
+
.join('\n');
|
|
1027
|
+
tmpBody.Delimiter = '\t';
|
|
1028
|
+
break;
|
|
1029
|
+
|
|
1030
|
+
case 'excel':
|
|
1031
|
+
// Special: convert BLS JSON → Excel → base64 for ingest
|
|
1032
|
+
return ingestAsExcelHTTP(pPort, pDatasetDef, tmpContent, pIDDataset, pIDSource, fCallback);
|
|
1033
|
+
|
|
1034
|
+
case 'fixed-width':
|
|
1035
|
+
tmpBody.Format = 'fixed-width';
|
|
1036
|
+
tmpBody.Content = tmpContent;
|
|
1037
|
+
if (pDatasetDef.parseOptions && pDatasetDef.parseOptions.columns)
|
|
1038
|
+
{
|
|
1039
|
+
tmpBody.Columns = pDatasetDef.parseOptions.columns;
|
|
1040
|
+
}
|
|
1041
|
+
// Apply maxLines limit by trimming content
|
|
1042
|
+
if (pDatasetDef.maxLines)
|
|
1043
|
+
{
|
|
1044
|
+
let tmpLines = tmpBody.Content.split('\n');
|
|
1045
|
+
if (tmpLines.length > pDatasetDef.maxLines)
|
|
1046
|
+
{
|
|
1047
|
+
tmpBody.Content = tmpLines.slice(0, pDatasetDef.maxLines).join('\n');
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
break;
|
|
1051
|
+
|
|
1052
|
+
default:
|
|
1053
|
+
return fCallback(new Error(`Unknown format: ${tmpFormat}`));
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
// Apply maxLines for non-fixed-width formats too
|
|
1057
|
+
if (tmpFormat !== 'fixed-width' && tmpFormat !== 'excel' && pDatasetDef.maxLines)
|
|
1058
|
+
{
|
|
1059
|
+
let tmpLines = tmpBody.Content.split('\n');
|
|
1060
|
+
if (tmpLines.length > pDatasetDef.maxLines + 1) // +1 for header
|
|
1061
|
+
{
|
|
1062
|
+
tmpBody.Content = tmpLines.slice(0, pDatasetDef.maxLines + 1).join('\n');
|
|
1063
|
+
}
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
httpPost(pPort, '/facto/ingest/file', tmpBody,
|
|
1067
|
+
(pError, pResult) =>
|
|
1068
|
+
{
|
|
1069
|
+
if (pError)
|
|
1070
|
+
{
|
|
1071
|
+
return fCallback(pError);
|
|
1072
|
+
}
|
|
1073
|
+
if (pResult && pResult.Error)
|
|
1074
|
+
{
|
|
1075
|
+
return fCallback(new Error(pResult.Error));
|
|
1076
|
+
}
|
|
1077
|
+
return fCallback(null,
|
|
1078
|
+
{
|
|
1079
|
+
Ingested: (pResult && pResult.Ingested) || 0,
|
|
1080
|
+
Errors: (pResult && pResult.Errors) || 0,
|
|
1081
|
+
Total: (pResult && pResult.Total) || 0,
|
|
1082
|
+
DatasetVersion: pResult && pResult.DatasetVersion,
|
|
1083
|
+
IsDuplicate: pResult && pResult.IsDuplicate,
|
|
1084
|
+
ContentSignature: pResult && pResult.ContentSignature
|
|
1085
|
+
});
|
|
1086
|
+
});
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
/**
|
|
1090
|
+
* For the Excel format demo in HTTP mode: parse the BLS JSON response,
|
|
1091
|
+
* write to a temporary .xlsx file, base64-encode it, and POST to the
|
|
1092
|
+
* /facto/ingest/file endpoint.
|
|
1093
|
+
*/
|
|
1094
|
+
function ingestAsExcelHTTP(pPort, pDatasetDef, pContent, pIDDataset, pIDSource, fCallback)
|
|
1095
|
+
{
|
|
1096
|
+
try
|
|
1097
|
+
{
|
|
1098
|
+
let libXLSX = require('xlsx');
|
|
1099
|
+
let tmpJSON = JSON.parse(pContent);
|
|
1100
|
+
|
|
1101
|
+
// BLS API returns { Results: { series: [ { data: [...] } ] } }
|
|
1102
|
+
let tmpRecords = [];
|
|
1103
|
+
if (tmpJSON.Results && tmpJSON.Results.series)
|
|
1104
|
+
{
|
|
1105
|
+
for (let s = 0; s < tmpJSON.Results.series.length; s++)
|
|
1106
|
+
{
|
|
1107
|
+
let tmpSeries = tmpJSON.Results.series[s];
|
|
1108
|
+
if (tmpSeries.data && Array.isArray(tmpSeries.data))
|
|
1109
|
+
{
|
|
1110
|
+
tmpRecords = tmpRecords.concat(tmpSeries.data);
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
if (tmpRecords.length === 0)
|
|
1116
|
+
{
|
|
1117
|
+
console.log(' BLS: No CPI data records found in response.');
|
|
1118
|
+
return fCallback(null, { Ingested: 0, Errors: 0, Total: 0 });
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
console.log(` BLS: Converting ${tmpRecords.length} records to Excel...`);
|
|
1122
|
+
|
|
1123
|
+
// Write to buffer
|
|
1124
|
+
let tmpWorksheet = libXLSX.utils.json_to_sheet(tmpRecords);
|
|
1125
|
+
let tmpWorkbook = libXLSX.utils.book_new();
|
|
1126
|
+
libXLSX.utils.book_append_sheet(tmpWorkbook, tmpWorksheet, 'CPI Data');
|
|
1127
|
+
let tmpExcelBuffer = libXLSX.write(tmpWorkbook, { type: 'buffer', bookType: 'xlsx' });
|
|
1128
|
+
|
|
1129
|
+
console.log(` BLS: Excel buffer ${tmpExcelBuffer.length} bytes, sending as base64...`);
|
|
1130
|
+
|
|
1131
|
+
let tmpBody =
|
|
1132
|
+
{
|
|
1133
|
+
IDDataset: pIDDataset,
|
|
1134
|
+
IDSource: pIDSource,
|
|
1135
|
+
Format: 'excel',
|
|
1136
|
+
Content: tmpExcelBuffer.toString('base64')
|
|
1137
|
+
};
|
|
1138
|
+
|
|
1139
|
+
httpPost(pPort, '/facto/ingest/file', tmpBody,
|
|
1140
|
+
(pError, pResult) =>
|
|
1141
|
+
{
|
|
1142
|
+
if (pError)
|
|
1143
|
+
{
|
|
1144
|
+
return fCallback(pError);
|
|
1145
|
+
}
|
|
1146
|
+
if (pResult && pResult.Error)
|
|
1147
|
+
{
|
|
1148
|
+
return fCallback(new Error(pResult.Error));
|
|
1149
|
+
}
|
|
1150
|
+
return fCallback(null,
|
|
1151
|
+
{
|
|
1152
|
+
Ingested: (pResult && pResult.Ingested) || 0,
|
|
1153
|
+
Errors: (pResult && pResult.Errors) || 0,
|
|
1154
|
+
Total: (pResult && pResult.Total) || 0,
|
|
1155
|
+
DatasetVersion: pResult && pResult.DatasetVersion,
|
|
1156
|
+
IsDuplicate: pResult && pResult.IsDuplicate,
|
|
1157
|
+
ContentSignature: pResult && pResult.ContentSignature
|
|
1158
|
+
});
|
|
1159
|
+
});
|
|
1160
|
+
}
|
|
1161
|
+
catch (pError)
|
|
1162
|
+
{
|
|
1163
|
+
return fCallback(pError);
|
|
1164
|
+
}
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
// ================================================================
|
|
1168
|
+
// Ingest: Direct Mode (parse + DAL)
|
|
1169
|
+
// ================================================================
|
|
1170
|
+
|
|
1171
|
+
function ingestDataDirect(pFable, pDatasetDef, pBuffer, pIDDataset, pIDSource, fCallback)
|
|
1172
|
+
{
|
|
1173
|
+
let tmpIngestEngine = pFable.RetoldFactoIngestEngine;
|
|
1174
|
+
let tmpFormat = pDatasetDef.format;
|
|
1175
|
+
let tmpCertainty = pDatasetDef.certainty;
|
|
1176
|
+
let tmpContent = pBuffer.toString('utf8');
|
|
1177
|
+
|
|
1178
|
+
let tmpParseCallback = (pParseError, pParsedRecords) =>
|
|
1179
|
+
{
|
|
1180
|
+
if (pParseError)
|
|
1181
|
+
{
|
|
1182
|
+
return fCallback(pParseError);
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
if (!pParsedRecords || pParsedRecords.length === 0)
|
|
1186
|
+
{
|
|
1187
|
+
return fCallback(null, { Ingested: 0, Errors: 0, Total: 0 });
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
// Limit records if specified
|
|
1191
|
+
if (pDatasetDef.maxLines && pParsedRecords.length > pDatasetDef.maxLines)
|
|
1192
|
+
{
|
|
1193
|
+
pParsedRecords = pParsedRecords.slice(0, pDatasetDef.maxLines);
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
console.log(` Parsed: ${pParsedRecords.length} records`);
|
|
1197
|
+
|
|
1198
|
+
// Ingest records sequentially
|
|
1199
|
+
let tmpIngested = 0;
|
|
1200
|
+
let tmpErrors = 0;
|
|
1201
|
+
|
|
1202
|
+
let tmpIngestRecord = (pRecordIndex) =>
|
|
1203
|
+
{
|
|
1204
|
+
if (pRecordIndex >= pParsedRecords.length)
|
|
1205
|
+
{
|
|
1206
|
+
return fCallback(null,
|
|
1207
|
+
{
|
|
1208
|
+
Ingested: tmpIngested,
|
|
1209
|
+
Errors: tmpErrors,
|
|
1210
|
+
Total: pParsedRecords.length
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
let tmpRowData = pParsedRecords[pRecordIndex];
|
|
1215
|
+
let tmpRecordData = {
|
|
1216
|
+
IDDataset: pIDDataset,
|
|
1217
|
+
IDSource: pIDSource,
|
|
1218
|
+
Type: `gov-${tmpFormat}`,
|
|
1219
|
+
Version: 1,
|
|
1220
|
+
IngestDate: new Date().toISOString(),
|
|
1221
|
+
Content: (typeof tmpRowData === 'string') ? tmpRowData : JSON.stringify(tmpRowData)
|
|
1222
|
+
};
|
|
1223
|
+
|
|
1224
|
+
let tmpQuery = pFable.DAL.Record.query.clone()
|
|
1225
|
+
.addRecord(tmpRecordData);
|
|
1226
|
+
|
|
1227
|
+
pFable.DAL.Record.doCreate(tmpQuery,
|
|
1228
|
+
(pCreateError, pQuery, pQueryRead, pRecord) =>
|
|
1229
|
+
{
|
|
1230
|
+
if (pCreateError)
|
|
1231
|
+
{
|
|
1232
|
+
tmpErrors++;
|
|
1233
|
+
return tmpIngestRecord(pRecordIndex + 1);
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
tmpIngested++;
|
|
1237
|
+
|
|
1238
|
+
// Create certainty index
|
|
1239
|
+
let tmpCIQuery = pFable.DAL.CertaintyIndex.query.clone()
|
|
1240
|
+
.addRecord(
|
|
1241
|
+
{
|
|
1242
|
+
IDRecord: pRecord.IDRecord,
|
|
1243
|
+
CertaintyValue: tmpCertainty,
|
|
1244
|
+
Dimension: 'overall',
|
|
1245
|
+
Justification: `${pDatasetDef.agency} official data`
|
|
1246
|
+
});
|
|
1247
|
+
|
|
1248
|
+
pFable.DAL.CertaintyIndex.doCreate(tmpCIQuery,
|
|
1249
|
+
() =>
|
|
1250
|
+
{
|
|
1251
|
+
return tmpIngestRecord(pRecordIndex + 1);
|
|
1252
|
+
});
|
|
1253
|
+
});
|
|
1254
|
+
};
|
|
1255
|
+
|
|
1256
|
+
tmpIngestRecord(0);
|
|
1257
|
+
};
|
|
1258
|
+
|
|
1259
|
+
// Route to appropriate parser
|
|
1260
|
+
switch (tmpFormat)
|
|
1261
|
+
{
|
|
1262
|
+
case 'csv':
|
|
1263
|
+
tmpIngestEngine.parseCSV(tmpContent, {}, tmpParseCallback);
|
|
1264
|
+
break;
|
|
1265
|
+
|
|
1266
|
+
case 'json':
|
|
1267
|
+
tmpIngestEngine.parseJSON(tmpContent, tmpParseCallback);
|
|
1268
|
+
break;
|
|
1269
|
+
|
|
1270
|
+
case 'xml':
|
|
1271
|
+
tmpIngestEngine.parseXML(tmpContent, pDatasetDef.parseOptions || {}, tmpParseCallback);
|
|
1272
|
+
break;
|
|
1273
|
+
|
|
1274
|
+
case 'tsv':
|
|
1275
|
+
tmpIngestEngine.parseCSV(tmpContent, pDatasetDef.parseOptions || { delimiter: '\t', stripCommentLines: true }, tmpParseCallback);
|
|
1276
|
+
break;
|
|
1277
|
+
|
|
1278
|
+
case 'excel':
|
|
1279
|
+
// Special: download JSON, convert to Excel, then parse back
|
|
1280
|
+
ingestAsExcelDirect(pFable, pDatasetDef, tmpContent, tmpParseCallback);
|
|
1281
|
+
break;
|
|
1282
|
+
|
|
1283
|
+
case 'fixed-width':
|
|
1284
|
+
tmpIngestEngine.parseFixedWidth(tmpContent, pDatasetDef.parseOptions || {}, tmpParseCallback);
|
|
1285
|
+
break;
|
|
1286
|
+
|
|
1287
|
+
default:
|
|
1288
|
+
return fCallback(new Error(`Unknown format: ${tmpFormat}`));
|
|
1289
|
+
}
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
/**
|
|
1293
|
+
* For the Excel format demo in direct mode: parse the BLS JSON response,
|
|
1294
|
+
* write to a temporary .xlsx file, then read it back through parseExcel.
|
|
1295
|
+
*/
|
|
1296
|
+
function ingestAsExcelDirect(pFable, pDatasetDef, pContent, fCallback)
|
|
1297
|
+
{
|
|
1298
|
+
try
|
|
1299
|
+
{
|
|
1300
|
+
let libXLSX = require('xlsx');
|
|
1301
|
+
let tmpJSON = JSON.parse(pContent);
|
|
1302
|
+
|
|
1303
|
+
// BLS API returns { Results: { series: [ { data: [...] } ] } }
|
|
1304
|
+
let tmpRecords = [];
|
|
1305
|
+
if (tmpJSON.Results && tmpJSON.Results.series)
|
|
1306
|
+
{
|
|
1307
|
+
for (let s = 0; s < tmpJSON.Results.series.length; s++)
|
|
1308
|
+
{
|
|
1309
|
+
let tmpSeries = tmpJSON.Results.series[s];
|
|
1310
|
+
if (tmpSeries.data && Array.isArray(tmpSeries.data))
|
|
1311
|
+
{
|
|
1312
|
+
tmpRecords = tmpRecords.concat(tmpSeries.data);
|
|
1313
|
+
}
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
if (tmpRecords.length === 0)
|
|
1318
|
+
{
|
|
1319
|
+
console.log(' BLS: No CPI data records found in response.');
|
|
1320
|
+
return fCallback(null, []);
|
|
1321
|
+
}
|
|
1322
|
+
|
|
1323
|
+
console.log(` BLS: Converting ${tmpRecords.length} records to Excel...`);
|
|
1324
|
+
|
|
1325
|
+
// Write to temporary .xlsx
|
|
1326
|
+
let tmpWorksheet = libXLSX.utils.json_to_sheet(tmpRecords);
|
|
1327
|
+
let tmpWorkbook = libXLSX.utils.book_new();
|
|
1328
|
+
libXLSX.utils.book_append_sheet(tmpWorkbook, tmpWorksheet, 'CPI Data');
|
|
1329
|
+
|
|
1330
|
+
let tmpExcelPath = libPath.join(__dirname, 'tmp-bls-cpi.xlsx');
|
|
1331
|
+
libXLSX.writeFile(tmpWorkbook, tmpExcelPath);
|
|
1332
|
+
|
|
1333
|
+
console.log(` BLS: Wrote temporary Excel file (${libFs.statSync(tmpExcelPath).size} bytes)`);
|
|
1334
|
+
|
|
1335
|
+
// Read it back as Buffer and parse through parseExcel
|
|
1336
|
+
let tmpExcelBuffer = libFs.readFileSync(tmpExcelPath);
|
|
1337
|
+
pFable.RetoldFactoIngestEngine.parseExcel(tmpExcelBuffer, {},
|
|
1338
|
+
(pParseError, pParsedRecords) =>
|
|
1339
|
+
{
|
|
1340
|
+
// Clean up temp file
|
|
1341
|
+
try { libFs.unlinkSync(tmpExcelPath); } catch (e) { /* ignore */ }
|
|
1342
|
+
|
|
1343
|
+
return fCallback(pParseError, pParsedRecords);
|
|
1344
|
+
});
|
|
1345
|
+
}
|
|
1346
|
+
catch (pError)
|
|
1347
|
+
{
|
|
1348
|
+
return fCallback(pError);
|
|
1349
|
+
}
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1352
|
+
// ================================================================
|
|
1353
|
+
// Summary
|
|
1354
|
+
// ================================================================
|
|
1355
|
+
|
|
1356
|
+
function printSummary(pFable, pPort)
|
|
1357
|
+
{
|
|
1358
|
+
console.log('==========================================================');
|
|
1359
|
+
console.log(' WAREHOUSE SUMMARY');
|
|
1360
|
+
console.log('==========================================================');
|
|
1361
|
+
|
|
1362
|
+
if (pPort)
|
|
1363
|
+
{
|
|
1364
|
+
// HTTP mode: use the ProjectionEngine summary endpoint
|
|
1365
|
+
httpGet(pPort, '/facto/projections/summary',
|
|
1366
|
+
(pError, pSummary) =>
|
|
1367
|
+
{
|
|
1368
|
+
if (pError)
|
|
1369
|
+
{
|
|
1370
|
+
console.error(` Error fetching summary: ${pError.message}`);
|
|
1371
|
+
return;
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
console.log(` Sources: ${pSummary.Sources}`);
|
|
1375
|
+
console.log(` Datasets: ${pSummary.Datasets}`);
|
|
1376
|
+
console.log(` Records: ${pSummary.Records}`);
|
|
1377
|
+
console.log(` Certainty Indices: ${pSummary.CertaintyIndices}`);
|
|
1378
|
+
console.log(` Ingest Jobs: ${pSummary.IngestJobs}`);
|
|
1379
|
+
console.log('');
|
|
1380
|
+
if (pSummary.DatasetsByType)
|
|
1381
|
+
{
|
|
1382
|
+
console.log(' Datasets by Type:');
|
|
1383
|
+
for (let tmpType in pSummary.DatasetsByType)
|
|
1384
|
+
{
|
|
1385
|
+
console.log(` ${tmpType}: ${pSummary.DatasetsByType[tmpType]}`);
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
console.log('');
|
|
1389
|
+
console.log(` Server: http://localhost:${pPort}/facto/app/`);
|
|
1390
|
+
console.log('==========================================================');
|
|
1391
|
+
console.log(' Done!');
|
|
1392
|
+
console.log('');
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
1395
|
+
else
|
|
1396
|
+
{
|
|
1397
|
+
// Direct mode: DAL counts
|
|
1398
|
+
let tmpCountEntities = ['Source', 'Dataset', 'DatasetSource', 'SourceDocumentation', 'Record', 'CertaintyIndex'];
|
|
1399
|
+
let tmpCounts = {};
|
|
1400
|
+
let tmpRemaining = tmpCountEntities.length;
|
|
1401
|
+
|
|
1402
|
+
for (let i = 0; i < tmpCountEntities.length; i++)
|
|
1403
|
+
{
|
|
1404
|
+
let tmpEntity = tmpCountEntities[i];
|
|
1405
|
+
|
|
1406
|
+
let tmpQuery = pFable.DAL[tmpEntity].query.clone()
|
|
1407
|
+
.addFilter('Deleted', 0);
|
|
1408
|
+
|
|
1409
|
+
pFable.DAL[tmpEntity].doCount(tmpQuery,
|
|
1410
|
+
(pError, pQuery, pCount) =>
|
|
1411
|
+
{
|
|
1412
|
+
tmpCounts[tmpEntity] = (typeof pCount === 'number') ? pCount : parseInt(pCount, 10) || 0;
|
|
1413
|
+
tmpRemaining--;
|
|
1414
|
+
|
|
1415
|
+
if (tmpRemaining <= 0)
|
|
1416
|
+
{
|
|
1417
|
+
console.log(` Sources: ${tmpCounts.Source}`);
|
|
1418
|
+
console.log(` Datasets: ${tmpCounts.Dataset}`);
|
|
1419
|
+
console.log(` Dataset-Source Links: ${tmpCounts.DatasetSource}`);
|
|
1420
|
+
console.log(` Source Documentation: ${tmpCounts.SourceDocumentation}`);
|
|
1421
|
+
console.log(` Records: ${tmpCounts.Record}`);
|
|
1422
|
+
console.log(` Certainty Indices: ${tmpCounts.CertaintyIndex}`);
|
|
1423
|
+
console.log('');
|
|
1424
|
+
console.log(` Database: ${_DBPath}`);
|
|
1425
|
+
console.log('==========================================================');
|
|
1426
|
+
console.log(' Done!');
|
|
1427
|
+
console.log('');
|
|
1428
|
+
}
|
|
1429
|
+
});
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
}
|