retold-facto 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.claude/launch.json +11 -0
  2. package/.dockerignore +8 -0
  3. package/.quackage.json +19 -0
  4. package/Dockerfile +26 -0
  5. package/bin/retold-facto.js +909 -0
  6. package/examples/facto-government-data.sqlite +0 -0
  7. package/examples/government-data-catalog.json +137 -0
  8. package/examples/government-data-loader.js +1432 -0
  9. package/package.json +91 -0
  10. package/scripts/facto-download.js +425 -0
  11. package/source/Retold-Facto.js +1042 -0
  12. package/source/services/Retold-Facto-BeaconProvider.js +511 -0
  13. package/source/services/Retold-Facto-CatalogManager.js +1252 -0
  14. package/source/services/Retold-Facto-DataLakeService.js +1642 -0
  15. package/source/services/Retold-Facto-DatasetManager.js +417 -0
  16. package/source/services/Retold-Facto-IngestEngine.js +1315 -0
  17. package/source/services/Retold-Facto-ProjectionEngine.js +3960 -0
  18. package/source/services/Retold-Facto-RecordManager.js +360 -0
  19. package/source/services/Retold-Facto-SchemaManager.js +1110 -0
  20. package/source/services/Retold-Facto-SourceFolderScanner.js +2243 -0
  21. package/source/services/Retold-Facto-SourceManager.js +730 -0
  22. package/source/services/Retold-Facto-StoreConnectionManager.js +441 -0
  23. package/source/services/Retold-Facto-ThroughputMonitor.js +478 -0
  24. package/source/services/web-app/codemirror-entry.js +7 -0
  25. package/source/services/web-app/pict-app/Pict-Application-Facto-Configuration.json +9 -0
  26. package/source/services/web-app/pict-app/Pict-Application-Facto.js +70 -0
  27. package/source/services/web-app/pict-app/Pict-Facto-Bundle.js +11 -0
  28. package/source/services/web-app/pict-app/providers/Pict-Provider-Facto-UI.js +66 -0
  29. package/source/services/web-app/pict-app/providers/Pict-Provider-Facto.js +69 -0
  30. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Catalog.js +93 -0
  31. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Connections.js +42 -0
  32. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Datasets.js +605 -0
  33. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Projections.js +188 -0
  34. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Scanner.js +80 -0
  35. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Schema.js +116 -0
  36. package/source/services/web-app/pict-app/providers/facto-api/Facto-API-Sources.js +104 -0
  37. package/source/services/web-app/pict-app/views/PictView-Facto-Catalog.js +526 -0
  38. package/source/services/web-app/pict-app/views/PictView-Facto-Datasets.js +173 -0
  39. package/source/services/web-app/pict-app/views/PictView-Facto-Ingest.js +259 -0
  40. package/source/services/web-app/pict-app/views/PictView-Facto-Layout.js +191 -0
  41. package/source/services/web-app/pict-app/views/PictView-Facto-Projections.js +231 -0
  42. package/source/services/web-app/pict-app/views/PictView-Facto-Records.js +326 -0
  43. package/source/services/web-app/pict-app/views/PictView-Facto-Scanner.js +624 -0
  44. package/source/services/web-app/pict-app/views/PictView-Facto-Sources.js +201 -0
  45. package/source/services/web-app/pict-app/views/PictView-Facto-Throughput.js +456 -0
  46. package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full-Configuration.json +14 -0
  47. package/source/services/web-app/pict-app-full/Pict-Application-Facto-Full.js +391 -0
  48. package/source/services/web-app/pict-app-full/providers/PictRouter-Facto-Configuration.json +56 -0
  49. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-BottomBar.js +68 -0
  50. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Connections.js +340 -0
  51. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboard.js +149 -0
  52. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Dashboards.js +819 -0
  53. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Datasets.js +178 -0
  54. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-IngestJobs.js +99 -0
  55. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Layout.js +62 -0
  56. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-MappingEditor.js +158 -0
  57. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-ProjectionDetail.js +1120 -0
  58. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Projections.js +172 -0
  59. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-QueryPanel.js +119 -0
  60. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-RecordViewer.js +663 -0
  61. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Records.js +648 -0
  62. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Scanner.js +1017 -0
  63. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDetail.js +1404 -0
  64. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaDocEditor.js +1036 -0
  65. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaEditor.js +636 -0
  66. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SchemaResearch.js +357 -0
  67. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceDetail.js +822 -0
  68. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceEditor.js +1036 -0
  69. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-SourceResearch.js +487 -0
  70. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Sources.js +165 -0
  71. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-Throughput.js +439 -0
  72. package/source/services/web-app/pict-app-full/views/PictView-Facto-Full-TopBar.js +335 -0
  73. package/source/services/web-app/pict-app-full/views/projections/Facto-Projections-Constants.js +71 -0
  74. package/source/services/web-app/web/chart.min.js +20 -0
  75. package/source/services/web-app/web/codemirror-bundle.js +30099 -0
  76. package/source/services/web-app/web/css/facto-themes.css +467 -0
  77. package/source/services/web-app/web/css/facto.css +502 -0
  78. package/source/services/web-app/web/index.html +28 -0
  79. package/source/services/web-app/web/retold-facto.js +12138 -0
  80. package/source/services/web-app/web/retold-facto.js.map +1 -0
  81. package/source/services/web-app/web/retold-facto.min.js +2 -0
  82. package/source/services/web-app/web/retold-facto.min.js.map +1 -0
  83. package/source/services/web-app/web/simple/index.html +17 -0
  84. package/test/Facto_Browser_Integration_tests.js +798 -0
  85. package/test/RetoldFacto_tests.js +4117 -0
  86. package/test/fixtures/weather-readings.csv +17 -0
  87. package/test/fixtures/weather-stations.csv +9 -0
  88. package/test/model/MeadowModel-Extended.json +8497 -0
  89. package/test/model/MeadowModel-PICT.json +1 -0
  90. package/test/model/MeadowModel.json +1355 -0
  91. package/test/model/ddl/Facto.ddl +225 -0
  92. package/test/model/fable-configuration.json +14 -0
@@ -0,0 +1,1432 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Retold Facto — Government Data Loader Example
4
+ *
5
+ * Downloads 6 real US government datasets spanning all supported
6
+ * ingest formats (CSV, JSON, XML, TSV, Excel, Fixed-Width), creates
7
+ * Source/Dataset/SourceDocumentation records, and ingests the data.
8
+ *
9
+ * Supports two modes:
10
+ * --port <port> HTTP mode: exercises the Facto REST API against a running server
11
+ * --db <path> Direct mode: creates a standalone SQLite database (default)
12
+ *
13
+ * Usage:
14
+ * node examples/government-data-loader.js --port 8386
15
+ * node examples/government-data-loader.js --db examples/facto-government-data.sqlite
16
+ *
17
+ * @author Steven Velozo <steven@velozo.com>
18
+ */
19
+ const libFs = require('fs');
20
+ const libPath = require('path');
21
+ const libHttp = require('http');
22
+ const libHttps = require('https');
23
+
24
+ // ================================================================
25
+ // Configuration
26
+ // ================================================================
27
+
28
+ let _DBPath = libPath.join(__dirname, 'facto-government-data.sqlite');
29
+ let _CLIPort = null;
30
+
31
+ // Parse CLI flags
32
+ let tmpArgs = process.argv.slice(2);
33
+ for (let i = 0; i < tmpArgs.length; i++)
34
+ {
35
+ if ((tmpArgs[i] === '--db' || tmpArgs[i] === '-d') && tmpArgs[i + 1])
36
+ {
37
+ _DBPath = libPath.resolve(tmpArgs[i + 1]);
38
+ i++;
39
+ }
40
+ else if ((tmpArgs[i] === '--port' || tmpArgs[i] === '-p') && tmpArgs[i + 1])
41
+ {
42
+ _CLIPort = parseInt(tmpArgs[i + 1], 10);
43
+ i++;
44
+ }
45
+ else if (tmpArgs[i] === '--help' || tmpArgs[i] === '-h')
46
+ {
47
+ console.log('Retold Facto — Government Data Loader');
48
+ console.log('');
49
+ console.log('Usage:');
50
+ console.log(' node examples/government-data-loader.js [options]');
51
+ console.log('');
52
+ console.log('Options:');
53
+ console.log(' --port, -p <port> HTTP mode: send data to a running Facto server');
54
+ console.log(' --db, -d <path> Direct mode: create a standalone SQLite database');
55
+ console.log(' (default: examples/facto-government-data.sqlite)');
56
+ console.log(' --help, -h Show this help message');
57
+ console.log('');
58
+ console.log('Examples:');
59
+ console.log(' node examples/government-data-loader.js --port 8386');
60
+ console.log(' node examples/government-data-loader.js --db /tmp/test.sqlite');
61
+ process.exit(0);
62
+ }
63
+ }
64
+
65
+ // ================================================================
66
+ // Dataset Definitions
67
+ // ================================================================
68
+
69
+ const GOVERNMENT_DATASETS = [
70
+ {
71
+ name: 'USGS Earthquakes (Past 7 Days)',
72
+ sourceType: 'API',
73
+ sourceURL: 'https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv',
74
+ agency: 'US Geological Survey',
75
+ description: 'All earthquakes worldwide in the past 7 days, from the USGS Earthquake Hazards Program real-time feed.',
76
+ format: 'csv',
77
+ certainty: 0.95,
78
+ documentation: [
79
+ {
80
+ Name: 'Data Source Information',
81
+ DocumentType: 'Overview',
82
+ Description: 'USGS Earthquake Hazards Program provides real-time earthquake data via GeoJSON, CSV, and KML feeds.',
83
+ Content: 'Feed URL: https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv\nUpdate Frequency: Every 5 minutes\nCoverage: Global\nFields: time, latitude, longitude, depth, mag, magType, nst, gap, dmin, rms, net, id, updated, place, type, horizontalError, depthError, magError, magNst, status, locationSource, magSource'
84
+ },
85
+ {
86
+ Name: 'License',
87
+ DocumentType: 'License',
88
+ Description: 'USGS data is in the public domain.',
89
+ Content: 'USGS-authored or produced data and information are in the public domain and may be used without restriction. https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits'
90
+ }
91
+ ]
92
+ },
93
+ {
94
+ name: 'Treasury Debt to the Penny',
95
+ sourceType: 'API',
96
+ sourceURL: 'https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v2/accounting/od/debt_to_penny?page[size]=100&format=json',
97
+ agency: 'US Treasury Department',
98
+ description: 'Daily US national debt figures from the Treasury Fiscal Data API, showing total public debt outstanding.',
99
+ format: 'json',
100
+ certainty: 0.98,
101
+ documentation: [
102
+ {
103
+ Name: 'API Documentation',
104
+ DocumentType: 'Overview',
105
+ Description: 'Treasury Fiscal Data API provides programmatic access to federal financial data.',
106
+ Content: 'API Base: https://api.fiscaldata.treasury.gov/\nEndpoint: /services/api/fiscal_service/v2/accounting/od/debt_to_penny\nFormat: JSON (default), CSV, XML\nFields: record_date, debt_held_public_amt, intragov_hold_amt, tot_pub_debt_out_amt, src_line_nbr, record_fiscal_year, record_fiscal_quarter, record_calendar_year, record_calendar_quarter, record_calendar_month, record_calendar_day'
107
+ },
108
+ {
109
+ Name: 'License',
110
+ DocumentType: 'License',
111
+ Description: 'US Government public domain data.',
112
+ Content: 'Data published by the US Treasury is in the public domain and freely available for reuse.'
113
+ }
114
+ ]
115
+ },
116
+ {
117
+ name: 'Treasury Exchange Rates',
118
+ sourceType: 'API',
119
+ sourceURL: 'https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/od/rates_of_exchange?page[size]=50&format=xml',
120
+ agency: 'US Treasury Department',
121
+ description: 'Foreign currency exchange rates published by the US Treasury, in XML format.',
122
+ format: 'xml',
123
+ certainty: 0.95,
124
+ documentation: [
125
+ {
126
+ Name: 'API Documentation',
127
+ DocumentType: 'Overview',
128
+ Description: 'Treasury exchange rates used for federal government reporting of foreign currency transactions.',
129
+ Content: 'API Base: https://api.fiscaldata.treasury.gov/\nEndpoint: /services/api/fiscal_service/v1/accounting/od/rates_of_exchange\nFormat: XML\nFields: record_date, country, currency, country_currency_desc, exchange_rate, effective_date, src_line_nbr, record_fiscal_year, record_fiscal_quarter, record_calendar_year, record_calendar_quarter, record_calendar_month, record_calendar_day'
130
+ },
131
+ {
132
+ Name: 'License',
133
+ DocumentType: 'License',
134
+ Description: 'US Government public domain data.',
135
+ Content: 'Exchange rates published by the Treasury are public domain data.'
136
+ }
137
+ ]
138
+ },
139
+ {
140
+ name: 'USGS Streamflow - Potomac River',
141
+ sourceType: 'API',
142
+ sourceURL: 'https://waterservices.usgs.gov/nwis/iv/?format=rdb&sites=01646500&period=P1D&parameterCd=00060',
143
+ agency: 'US Geological Survey',
144
+ description: 'Real-time streamflow (discharge) data for the Potomac River at Little Falls, DC. Tab-delimited RDB format with comment headers.',
145
+ format: 'tsv',
146
+ certainty: 0.90,
147
+ parseOptions: { delimiter: '\t', stripCommentLines: true },
148
+ documentation: [
149
+ {
150
+ Name: 'Data Source Information',
151
+ DocumentType: 'Overview',
152
+ Description: 'USGS National Water Information System (NWIS) provides real-time water data from thousands of monitoring stations.',
153
+ Content: 'Service URL: https://waterservices.usgs.gov/nwis/iv/\nFormat: RDB (tab-delimited with # comment lines)\nSite: 01646500 (Potomac River at Little Falls, Washington DC)\nParameter: 00060 (Discharge, cubic feet per second)\nPeriod: P1D (past 1 day)\nUpdate Frequency: Every 15 minutes'
154
+ },
155
+ {
156
+ Name: 'License',
157
+ DocumentType: 'License',
158
+ Description: 'USGS data is in the public domain.',
159
+ Content: 'All USGS data products are in the public domain.'
160
+ }
161
+ ]
162
+ },
163
+ {
164
+ name: 'BLS Consumer Price Index',
165
+ sourceType: 'API',
166
+ sourceURL: 'https://api.bls.gov/publicAPI/v2/timeseries/data/CUUR0000SA0',
167
+ agency: 'Bureau of Labor Statistics',
168
+ description: 'Consumer Price Index for All Urban Consumers (CPI-U), seasonally adjusted. Downloaded as JSON, converted to Excel for format demonstration.',
169
+ format: 'excel',
170
+ certainty: 0.97,
171
+ documentation: [
172
+ {
173
+ Name: 'API Documentation',
174
+ DocumentType: 'Overview',
175
+ Description: 'BLS Public Data API provides access to economic time series data.',
176
+ Content: 'API Base: https://api.bls.gov/publicAPI/v2/\nSeries: CUUR0000SA0 (CPI for All Urban Consumers, All items, US city average, Not seasonally adjusted)\nFormat: JSON\nFields: year, period, periodName, value, footnotes'
177
+ },
178
+ {
179
+ Name: 'License',
180
+ DocumentType: 'License',
181
+ Description: 'BLS data is in the public domain.',
182
+ Content: 'Bureau of Labor Statistics data are in the public domain and may be reproduced without permission.'
183
+ }
184
+ ]
185
+ },
186
+ {
187
+ name: 'NOAA Weather Station Inventory',
188
+ sourceType: 'File',
189
+ sourceURL: 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt',
190
+ agency: 'National Oceanic and Atmospheric Administration',
191
+ description: 'Global Historical Climatology Network (GHCN) daily weather station inventory. Fixed-width format containing station IDs, locations, and metadata.',
192
+ format: 'fixed-width',
193
+ certainty: 0.92,
194
+ parseOptions:
195
+ {
196
+ columns: [
197
+ { name: 'ID', start: 1, width: 11 },
198
+ { name: 'Latitude', start: 13, width: 8 },
199
+ { name: 'Longitude', start: 22, width: 9 },
200
+ { name: 'Elevation', start: 32, width: 6 },
201
+ { name: 'State', start: 39, width: 2 },
202
+ { name: 'Name', start: 42, width: 30 },
203
+ { name: 'GSNFlag', start: 73, width: 3 },
204
+ { name: 'HCNFlag', start: 77, width: 3 },
205
+ { name: 'WMOId', start: 81, width: 5 }
206
+ ]
207
+ },
208
+ // Limit to first 500 lines to keep example manageable
209
+ maxLines: 500,
210
+ documentation: [
211
+ {
212
+ Name: 'Data Source Information',
213
+ DocumentType: 'Overview',
214
+ Description: 'GHCN-Daily is the official archive of daily weather observations from land-based stations worldwide.',
215
+ Content: 'File: ghcnd-stations.txt\nFormat: Fixed-width text\nColumns: ID (11), Latitude (8), Longitude (9), Elevation (6), State (2), Name (30), GSN Flag (3), HCN/CRN Flag (3), WMO ID (5)\nCoverage: Global (~120,000+ stations)\nSource: NOAA National Centers for Environmental Information'
216
+ },
217
+ {
218
+ Name: 'License',
219
+ DocumentType: 'License',
220
+ Description: 'NOAA data is in the public domain.',
221
+ Content: 'NOAA data are generally in the public domain and may be used freely.'
222
+ },
223
+ {
224
+ Name: 'Data Dictionary',
225
+ DocumentType: 'DataDictionary',
226
+ Description: 'Column definitions for the GHCN station inventory file.',
227
+ Content: 'ID: 11-char station identifier (country code + network code + station number)\nLatitude: Decimal degrees, positive=North\nLongitude: Decimal degrees, positive=East\nElevation: Meters above sea level\nState: US state abbreviation (blank for non-US)\nName: Station name\nGSN Flag: GSN (GCOS Surface Network) indicator\nHCN Flag: HCN (US Historical Climatology Network) or CRN indicator\nWMO ID: World Meteorological Organization station number'
228
+ }
229
+ ]
230
+ }
231
+ ];
232
+
233
+ // ================================================================
234
+ // HTTPS Download Helper
235
+ // ================================================================
236
+
237
+ function downloadURL(pURL, fCallback)
238
+ {
239
+ console.log(` Downloading: ${pURL.substring(0, 80)}...`);
240
+
241
+ let tmpRequest = libHttps.get(pURL,
242
+ (pResponse) =>
243
+ {
244
+ // Follow redirects
245
+ if (pResponse.statusCode >= 300 && pResponse.statusCode < 400 && pResponse.headers.location)
246
+ {
247
+ let tmpRedirectURL = pResponse.headers.location;
248
+ if (!tmpRedirectURL.startsWith('http'))
249
+ {
250
+ let tmpParsedURL = new URL(pURL);
251
+ tmpRedirectURL = tmpParsedURL.origin + tmpRedirectURL;
252
+ }
253
+ console.log(` Redirecting to: ${tmpRedirectURL.substring(0, 80)}...`);
254
+ return downloadURL(tmpRedirectURL, fCallback);
255
+ }
256
+
257
+ if (pResponse.statusCode !== 200)
258
+ {
259
+ return fCallback(new Error(`HTTP ${pResponse.statusCode} for ${pURL}`));
260
+ }
261
+
262
+ let tmpChunks = [];
263
+ pResponse.on('data', (pChunk) => tmpChunks.push(pChunk));
264
+ pResponse.on('end', () =>
265
+ {
266
+ let tmpBuffer = Buffer.concat(tmpChunks);
267
+ return fCallback(null, tmpBuffer);
268
+ });
269
+ pResponse.on('error', (pError) => fCallback(pError));
270
+ });
271
+
272
+ tmpRequest.on('error', (pError) => fCallback(pError));
273
+ tmpRequest.setTimeout(30000, () =>
274
+ {
275
+ tmpRequest.destroy();
276
+ return fCallback(new Error(`Timeout downloading ${pURL}`));
277
+ });
278
+ }
279
+
280
+ // ================================================================
281
+ // HTTP API Helpers (for HTTP mode)
282
+ // ================================================================
283
+
284
+ function httpRequest(pMethod, pPort, pPath, pBody, fCallback)
285
+ {
286
+ let tmpBodyString = pBody ? JSON.stringify(pBody) : null;
287
+
288
+ let tmpOptions =
289
+ {
290
+ hostname: 'localhost',
291
+ port: pPort,
292
+ path: pPath,
293
+ method: pMethod,
294
+ headers: { 'Content-Type': 'application/json' }
295
+ };
296
+
297
+ if (tmpBodyString)
298
+ {
299
+ tmpOptions.headers['Content-Length'] = Buffer.byteLength(tmpBodyString);
300
+ }
301
+
302
+ let tmpRequest = libHttp.request(tmpOptions,
303
+ (pResponse) =>
304
+ {
305
+ let tmpChunks = [];
306
+ pResponse.on('data', (pChunk) => tmpChunks.push(pChunk));
307
+ pResponse.on('end', () =>
308
+ {
309
+ let tmpRaw = Buffer.concat(tmpChunks).toString('utf8');
310
+ try
311
+ {
312
+ return fCallback(null, JSON.parse(tmpRaw));
313
+ }
314
+ catch (pParseError)
315
+ {
316
+ return fCallback(new Error(`Failed to parse response from ${pPath}: ${tmpRaw.substring(0, 200)}`));
317
+ }
318
+ });
319
+ pResponse.on('error', (pError) => fCallback(pError));
320
+ });
321
+
322
+ tmpRequest.on('error', (pError) => fCallback(pError));
323
+ tmpRequest.setTimeout(60000, () =>
324
+ {
325
+ tmpRequest.destroy();
326
+ return fCallback(new Error(`Timeout on ${pMethod} ${pPath}`));
327
+ });
328
+
329
+ if (tmpBodyString)
330
+ {
331
+ tmpRequest.write(tmpBodyString);
332
+ }
333
+ tmpRequest.end();
334
+ }
335
+
336
+ function httpPost(pPort, pPath, pBody, fCallback)
337
+ {
338
+ return httpRequest('POST', pPort, pPath, pBody, fCallback);
339
+ }
340
+
341
+ function httpPut(pPort, pPath, pBody, fCallback)
342
+ {
343
+ return httpRequest('PUT', pPort, pPath, pBody, fCallback);
344
+ }
345
+
346
+ function httpGet(pPort, pPath, fCallback)
347
+ {
348
+ return httpRequest('GET', pPort, pPath, null, fCallback);
349
+ }
350
+
351
+ // Track created Sources by agency name to avoid duplicates
352
+ let _SourcesByAgency = {};
353
+
354
+ // ================================================================
355
+ // Bootstrap
356
+ // ================================================================
357
+
358
+ console.log('==========================================================');
359
+ console.log(' Retold Facto — Government Data Loader');
360
+ console.log('==========================================================');
361
+
362
+ if (_CLIPort)
363
+ {
364
+ console.log(` Mode: HTTP (port ${_CLIPort})`);
365
+ console.log('');
366
+
367
+ // In HTTP mode, just start processing datasets immediately
368
+ processAllDatasets(null, _CLIPort);
369
+ }
370
+ else
371
+ {
372
+ console.log(` Mode: Direct (SQLite)`);
373
+ console.log(` Database: ${_DBPath}`);
374
+ console.log('');
375
+
376
+ bootstrapDirect();
377
+ }
378
+
379
+ // ================================================================
380
+ // Direct-mode Bootstrap (SQLite + Fable)
381
+ // ================================================================
382
+
383
+ function bootstrapDirect()
384
+ {
385
+ const libFable = require('fable');
386
+ const libMeadowConnectionSQLite = require('meadow-connection-sqlite');
387
+ const libRetoldFacto = require('../source/Retold-Facto.js');
388
+
389
+ // Remove existing database so we start fresh
390
+ if (libFs.existsSync(_DBPath))
391
+ {
392
+ libFs.unlinkSync(_DBPath);
393
+ }
394
+
395
+ // Ensure parent directory exists
396
+ let tmpDbDir = libPath.dirname(_DBPath);
397
+ if (!libFs.existsSync(tmpDbDir))
398
+ {
399
+ libFs.mkdirSync(tmpDbDir, { recursive: true });
400
+ }
401
+
402
+ let _Settings = (
403
+ {
404
+ Product: 'FactoGovLoader',
405
+ ProductVersion: '0.0.1',
406
+ APIServerPort: 0,
407
+ LogStreams:
408
+ [
409
+ {
410
+ streamtype: 'console',
411
+ level: 'warn'
412
+ }
413
+ ],
414
+ SQLite:
415
+ {
416
+ SQLiteFilePath: _DBPath
417
+ }
418
+ });
419
+
420
+ let _Fable = new libFable(_Settings);
421
+
422
+ _Fable.serviceManager.addServiceType('MeadowSQLiteProvider', libMeadowConnectionSQLite);
423
+ _Fable.serviceManager.instantiateServiceProvider('MeadowSQLiteProvider');
424
+
425
+ _Fable.MeadowSQLiteProvider.connectAsync(
426
+ (pConnectError) =>
427
+ {
428
+ if (pConnectError)
429
+ {
430
+ console.error(`SQLite connection error: ${pConnectError}`);
431
+ process.exit(1);
432
+ }
433
+
434
+ _Fable.settings.MeadowProvider = 'SQLite';
435
+
436
+ // Create schema
437
+ _Fable.MeadowSQLiteProvider.db.exec(libRetoldFacto.FACTO_SCHEMA_SQL);
438
+ console.log(' Schema created.');
439
+
440
+ // Set up the Facto service (no web server needed)
441
+ _Fable.serviceManager.addServiceType('RetoldFacto', libRetoldFacto);
442
+ _Fable.serviceManager.instantiateServiceProvider('RetoldFacto',
443
+ {
444
+ StorageProvider: 'SQLite',
445
+ AutoStartOrator: false,
446
+
447
+ FullMeadowSchemaPath: libPath.join(__dirname, '..', 'test', 'model') + '/',
448
+ FullMeadowSchemaFilename: 'MeadowModel-Extended.json',
449
+
450
+ Endpoints:
451
+ {
452
+ MeadowEndpoints: true,
453
+ SourceManager: false,
454
+ RecordManager: false,
455
+ DatasetManager: false,
456
+ IngestEngine: true,
457
+ ProjectionEngine: false,
458
+ WebUI: false
459
+ }
460
+ });
461
+
462
+ _Fable.RetoldFacto.initializeService(
463
+ (pInitError) =>
464
+ {
465
+ if (pInitError)
466
+ {
467
+ console.error(`Initialization error: ${pInitError}`);
468
+ process.exit(1);
469
+ }
470
+
471
+ console.log(' Facto service initialized.');
472
+ console.log('');
473
+
474
+ processAllDatasets(_Fable, null);
475
+ });
476
+ });
477
+ }
478
+
479
+ // ================================================================
480
+ // Agency-to-Source Mapping
481
+ // ================================================================
482
+
483
+ /**
484
+ * Find or create a Source for a given agency.
485
+ * In HTTP mode, queries the server first; in direct mode, checks our local map.
486
+ */
487
+ function findOrCreateSource(pFable, pPort, pAgencyName, pSourceType, fCallback)
488
+ {
489
+ let tmpIsHTTP = (pPort !== null);
490
+
491
+ // Already created this agency's Source in this run?
492
+ if (_SourcesByAgency[pAgencyName])
493
+ {
494
+ console.log(` Source reused: #${_SourcesByAgency[pAgencyName]} (${pAgencyName})`);
495
+ return fCallback(null, _SourcesByAgency[pAgencyName]);
496
+ }
497
+
498
+ let tmpSourceData =
499
+ {
500
+ Name: pAgencyName,
501
+ Type: pSourceType,
502
+ Protocol: 'HTTPS',
503
+ Description: `Data source: ${pAgencyName}`,
504
+ Active: 1
505
+ };
506
+
507
+ if (tmpIsHTTP)
508
+ {
509
+ // Check if source already exists on the server by name
510
+ // Meadow FilteredTo: /1.0/{PluralEntity}/FilteredTo/FBV~Field~EQ~Value/Begin/Cap
511
+ httpGet(pPort, `/1.0/Sources/FilteredTo/FBV~Name~EQ~${encodeURIComponent(pAgencyName)}/0/10`,
512
+ (pError, pResult) =>
513
+ {
514
+ // Meadow doReads returns an array directly
515
+ let tmpRecords = Array.isArray(pResult) ? pResult : [];
516
+ if (tmpRecords.length > 0)
517
+ {
518
+ let tmpExistingID = tmpRecords[0].IDSource;
519
+ _SourcesByAgency[pAgencyName] = tmpExistingID;
520
+ console.log(` Source found on server: #${tmpExistingID} (${pAgencyName})`);
521
+ return fCallback(null, tmpExistingID);
522
+ }
523
+
524
+ // Create new source
525
+ httpPost(pPort, '/1.0/Source', tmpSourceData,
526
+ (pCreateError, pCreateResult) =>
527
+ {
528
+ if (pCreateError)
529
+ {
530
+ console.error(` Error creating source: ${pCreateError.message}`);
531
+ return fCallback(pCreateError);
532
+ }
533
+ let tmpNewID = pCreateResult.IDSource;
534
+ _SourcesByAgency[pAgencyName] = tmpNewID;
535
+ console.log(` Source created: #${tmpNewID} (${pAgencyName})`);
536
+
537
+ // Activate it
538
+ httpPut(pPort, `/facto/source/${tmpNewID}/activate`, {},
539
+ (pActivateError) =>
540
+ {
541
+ if (!pActivateError)
542
+ {
543
+ console.log(` Source #${tmpNewID} activated`);
544
+ }
545
+ return fCallback(null, tmpNewID);
546
+ });
547
+ });
548
+ });
549
+ }
550
+ else
551
+ {
552
+ let tmpQuery = pFable.DAL.Source.query.clone()
553
+ .addRecord(tmpSourceData);
554
+
555
+ pFable.DAL.Source.doCreate(tmpQuery,
556
+ (pError, pQuery, pQueryRead, pRecord) =>
557
+ {
558
+ if (pError)
559
+ {
560
+ console.error(` Error creating source: ${pError}`);
561
+ return fCallback(pError);
562
+ }
563
+ let tmpNewID = pRecord.IDSource;
564
+ _SourcesByAgency[pAgencyName] = tmpNewID;
565
+ console.log(` Source created: #${tmpNewID} (${pAgencyName})`);
566
+ return fCallback(null, tmpNewID);
567
+ });
568
+ }
569
+ }
570
+
571
+ /**
572
+ * Find or create a Dataset by name.
573
+ * In HTTP mode, queries the server first; in direct mode, always creates.
574
+ */
575
+ function findOrCreateDataset(pFable, pPort, pDatasetDef, fCallback)
576
+ {
577
+ let tmpIsHTTP = (pPort !== null);
578
+ let tmpDatasetData =
579
+ {
580
+ Name: pDatasetDef.name,
581
+ Type: 'Raw',
582
+ Description: pDatasetDef.description
583
+ };
584
+
585
+ if (tmpIsHTTP)
586
+ {
587
+ // Check if dataset already exists on the server by name
588
+ // Meadow FilteredTo: /1.0/{PluralEntity}/FilteredTo/FBV~Field~EQ~Value/Begin/Cap
589
+ httpGet(pPort, `/1.0/Datasets/FilteredTo/FBV~Name~EQ~${encodeURIComponent(pDatasetDef.name)}/0/10`,
590
+ (pError, pResult) =>
591
+ {
592
+ let tmpRecords = Array.isArray(pResult) ? pResult : [];
593
+ if (tmpRecords.length > 0)
594
+ {
595
+ let tmpExistingID = tmpRecords[0].IDDataset;
596
+ console.log(` Dataset found on server: #${tmpExistingID} (${pDatasetDef.name})`);
597
+ return fCallback(null, tmpExistingID);
598
+ }
599
+
600
+ httpPost(pPort, '/1.0/Dataset', tmpDatasetData,
601
+ (pCreateError, pCreateResult) =>
602
+ {
603
+ if (pCreateError)
604
+ {
605
+ console.error(` Error creating dataset: ${pCreateError.message}`);
606
+ return fCallback(pCreateError);
607
+ }
608
+ let tmpNewID = pCreateResult.IDDataset;
609
+ console.log(` Dataset created: #${tmpNewID}`);
610
+ return fCallback(null, tmpNewID);
611
+ });
612
+ });
613
+ }
614
+ else
615
+ {
616
+ let tmpQuery = pFable.DAL.Dataset.query.clone()
617
+ .addRecord(tmpDatasetData);
618
+
619
+ pFable.DAL.Dataset.doCreate(tmpQuery,
620
+ (pError, pQuery, pQueryRead, pRecord) =>
621
+ {
622
+ if (pError)
623
+ {
624
+ console.error(` Error creating dataset: ${pError}`);
625
+ return fCallback(pError);
626
+ }
627
+ console.log(` Dataset created: #${pRecord.IDDataset}`);
628
+ return fCallback(null, pRecord.IDDataset);
629
+ });
630
+ }
631
+ }
632
+
633
+ // ================================================================
634
+ // Dataset Processing Entry Point
635
+ // ================================================================
636
+
637
+ function processAllDatasets(pFable, pPort)
638
+ {
639
+ // Reset agency tracking for this run
640
+ _SourcesByAgency = {};
641
+
642
+ processDataset(pFable, pPort, 0,
643
+ (pError) =>
644
+ {
645
+ if (pError)
646
+ {
647
+ console.error(`\nError during processing: ${pError.message}`);
648
+ }
649
+
650
+ printSummary(pFable, pPort);
651
+ });
652
+ }
653
+
654
+ // ================================================================
655
+ // Sequential Dataset Processing
656
+ // ================================================================
657
+
658
+ function processDataset(pFable, pPort, pIndex, fDone)
659
+ {
660
+ if (pIndex >= GOVERNMENT_DATASETS.length)
661
+ {
662
+ return fDone();
663
+ }
664
+
665
+ let tmpDatasetDef = GOVERNMENT_DATASETS[pIndex];
666
+ let tmpIsHTTP = (pPort !== null);
667
+
668
+ console.log(`----------------------------------------------------------`);
669
+ console.log(` [${pIndex + 1}/${GOVERNMENT_DATASETS.length}] ${tmpDatasetDef.name}`);
670
+ console.log(` Format: ${tmpDatasetDef.format.toUpperCase()} | Agency: ${tmpDatasetDef.agency}`);
671
+ console.log(` Certainty: ${tmpDatasetDef.certainty}`);
672
+ console.log('');
673
+
674
+ let tmpSourceID = 0;
675
+ let tmpDatasetID = 0;
676
+
677
+ // Step chain — each step calls the next
678
+ let tmpSteps = [];
679
+
680
+ // Step 1: Find or Create Source (deduped by agency name)
681
+ tmpSteps.push(
682
+ (fStep) =>
683
+ {
684
+ findOrCreateSource(pFable, pPort, tmpDatasetDef.agency, tmpDatasetDef.sourceType,
685
+ (pError, pSourceID) =>
686
+ {
687
+ if (!pError && pSourceID)
688
+ {
689
+ tmpSourceID = pSourceID;
690
+ }
691
+ return fStep();
692
+ });
693
+ });
694
+
695
+ // Step 2: Find or Create Dataset (deduped by name in HTTP mode)
696
+ tmpSteps.push(
697
+ (fStep) =>
698
+ {
699
+ findOrCreateDataset(pFable, pPort, tmpDatasetDef,
700
+ (pError, pDatasetID) =>
701
+ {
702
+ if (!pError && pDatasetID)
703
+ {
704
+ tmpDatasetID = pDatasetID;
705
+ }
706
+ return fStep();
707
+ });
708
+ });
709
+
710
+ // Step 3: Set VersionPolicy on Dataset
711
+ tmpSteps.push(
712
+ (fStep) =>
713
+ {
714
+ if (!tmpDatasetID)
715
+ {
716
+ return fStep();
717
+ }
718
+
719
+ if (tmpIsHTTP)
720
+ {
721
+ httpPut(pPort, `/facto/dataset/${tmpDatasetID}/version-policy`,
722
+ { VersionPolicy: 'Append' },
723
+ (pError) =>
724
+ {
725
+ if (!pError)
726
+ {
727
+ console.log(` VersionPolicy set: Append`);
728
+ }
729
+ return fStep();
730
+ });
731
+ }
732
+ else
733
+ {
734
+ // Direct mode: update the Dataset record
735
+ let tmpQuery = pFable.DAL.Dataset.query.clone()
736
+ .addRecord({ IDDataset: tmpDatasetID, VersionPolicy: 'Append' });
737
+
738
+ pFable.DAL.Dataset.doUpdate(tmpQuery,
739
+ (pError) =>
740
+ {
741
+ if (!pError)
742
+ {
743
+ console.log(` VersionPolicy set: Append`);
744
+ }
745
+ return fStep();
746
+ });
747
+ }
748
+ });
749
+
750
+ // Step 4: Link Dataset to Source
751
+ tmpSteps.push(
752
+ (fStep) =>
753
+ {
754
+ if (!tmpSourceID || !tmpDatasetID)
755
+ {
756
+ return fStep();
757
+ }
758
+
759
+ if (tmpIsHTTP)
760
+ {
761
+ httpPost(pPort, `/facto/dataset/${tmpDatasetID}/source`,
762
+ {
763
+ IDSource: tmpSourceID,
764
+ ReliabilityWeight: tmpDatasetDef.certainty
765
+ },
766
+ (pError, pResult) =>
767
+ {
768
+ if (pError)
769
+ {
770
+ console.error(` Error linking source to dataset: ${pError.message}`);
771
+ }
772
+ else
773
+ {
774
+ console.log(` DatasetSource linked (weight: ${tmpDatasetDef.certainty})`);
775
+ }
776
+ return fStep();
777
+ });
778
+ }
779
+ else
780
+ {
781
+ let tmpQuery = pFable.DAL.DatasetSource.query.clone()
782
+ .addRecord(
783
+ {
784
+ IDDataset: tmpDatasetID,
785
+ IDSource: tmpSourceID,
786
+ ReliabilityWeight: tmpDatasetDef.certainty
787
+ });
788
+
789
+ pFable.DAL.DatasetSource.doCreate(tmpQuery,
790
+ (pError) =>
791
+ {
792
+ if (!pError)
793
+ {
794
+ console.log(` DatasetSource linked (weight: ${tmpDatasetDef.certainty})`);
795
+ }
796
+ return fStep();
797
+ });
798
+ }
799
+ });
800
+
801
+ // Step 5: Create SourceDocumentation
802
+ tmpSteps.push(
803
+ (fStep) =>
804
+ {
805
+ if (!tmpSourceID || !tmpDatasetDef.documentation)
806
+ {
807
+ return fStep();
808
+ }
809
+
810
+ let tmpDocsRemaining = tmpDatasetDef.documentation.length;
811
+ let tmpDocsCreated = 0;
812
+
813
+ for (let d = 0; d < tmpDatasetDef.documentation.length; d++)
814
+ {
815
+ let tmpDoc = tmpDatasetDef.documentation[d];
816
+ let tmpDocData =
817
+ {
818
+ IDSource: tmpSourceID,
819
+ Name: tmpDoc.Name,
820
+ DocumentType: tmpDoc.DocumentType,
821
+ MimeType: 'text/plain',
822
+ Description: tmpDoc.Description,
823
+ Content: tmpDoc.Content
824
+ };
825
+
826
+ let tmpDocDone = () =>
827
+ {
828
+ tmpDocsCreated++;
829
+ if (tmpDocsCreated >= tmpDocsRemaining)
830
+ {
831
+ console.log(` Documentation created: ${tmpDocsRemaining} doc(s)`);
832
+ return fStep();
833
+ }
834
+ };
835
+
836
+ if (tmpIsHTTP)
837
+ {
838
+ httpPost(pPort, '/1.0/SourceDocumentation', tmpDocData,
839
+ (pError) =>
840
+ {
841
+ return tmpDocDone();
842
+ });
843
+ }
844
+ else
845
+ {
846
+ let tmpQuery = pFable.DAL.SourceDocumentation.query.clone()
847
+ .addRecord(tmpDocData);
848
+
849
+ pFable.DAL.SourceDocumentation.doCreate(tmpQuery,
850
+ (pError) =>
851
+ {
852
+ return tmpDocDone();
853
+ });
854
+ }
855
+ }
856
+ });
857
+
858
+ // Step 6: Download and Ingest
859
+ tmpSteps.push(
860
+ (fStep) =>
861
+ {
862
+ if (!tmpSourceID || !tmpDatasetID)
863
+ {
864
+ console.error(' Skipping ingest: no source/dataset ID.');
865
+ return fStep();
866
+ }
867
+
868
+ downloadURL(tmpDatasetDef.sourceURL,
869
+ (pDownloadError, pBuffer) =>
870
+ {
871
+ if (pDownloadError)
872
+ {
873
+ console.error(` Download error: ${pDownloadError.message}`);
874
+ return fStep();
875
+ }
876
+
877
+ console.log(` Downloaded: ${pBuffer.length} bytes`);
878
+
879
+ if (tmpIsHTTP)
880
+ {
881
+ ingestDataHTTP(pPort, tmpDatasetDef, pBuffer, tmpDatasetID, tmpSourceID,
882
+ (pIngestError, pResult) =>
883
+ {
884
+ if (pIngestError)
885
+ {
886
+ console.error(` Ingest error: ${pIngestError.message}`);
887
+ }
888
+ else if (pResult)
889
+ {
890
+ console.log(` Ingested: ${pResult.Ingested} records (${pResult.Errors} errors)`);
891
+ if (pResult.DatasetVersion)
892
+ {
893
+ console.log(` Version: ${pResult.DatasetVersion} | Duplicate: ${pResult.IsDuplicate ? 'Yes' : 'No'}`);
894
+ }
895
+ }
896
+ return fStep();
897
+ });
898
+ }
899
+ else
900
+ {
901
+ ingestDataDirect(pFable, tmpDatasetDef, pBuffer, tmpDatasetID, tmpSourceID,
902
+ (pIngestError, pResult) =>
903
+ {
904
+ if (pIngestError)
905
+ {
906
+ console.error(` Ingest error: ${pIngestError.message}`);
907
+ }
908
+ else if (pResult)
909
+ {
910
+ console.log(` Ingested: ${pResult.Ingested} records (${pResult.Errors} errors)`);
911
+ }
912
+ return fStep();
913
+ });
914
+ }
915
+ });
916
+ });
917
+
918
+ // Step 7: Print per-dataset stats
919
+ tmpSteps.push(
920
+ (fStep) =>
921
+ {
922
+ if (!tmpSourceID || !tmpDatasetID)
923
+ {
924
+ return fStep();
925
+ }
926
+
927
+ if (tmpIsHTTP)
928
+ {
929
+ httpGet(pPort, `/facto/source/${tmpSourceID}/summary`,
930
+ (pError, pSourceSummary) =>
931
+ {
932
+ if (!pError && pSourceSummary)
933
+ {
934
+ console.log(` Source summary: ${pSourceSummary.RecordCount} records, ${pSourceSummary.DocumentationCount} docs`);
935
+ }
936
+ httpGet(pPort, `/facto/dataset/${tmpDatasetID}/stats`,
937
+ (pError, pDatasetStats) =>
938
+ {
939
+ if (!pError && pDatasetStats)
940
+ {
941
+ console.log(` Dataset stats: ${pDatasetStats.RecordCount} records, ${pDatasetStats.SourceCount} source(s)`);
942
+ }
943
+ return fStep();
944
+ });
945
+ });
946
+ }
947
+ else
948
+ {
949
+ // Direct mode: quick count for feedback
950
+ let tmpQuery = pFable.DAL.Record.query.clone()
951
+ .addFilter('IDDataset', tmpDatasetID)
952
+ .addFilter('Deleted', 0);
953
+ pFable.DAL.Record.doCount(tmpQuery,
954
+ (pError, pQuery, pCount) =>
955
+ {
956
+ let tmpRecordCount = (typeof pCount === 'number') ? pCount : parseInt(pCount, 10) || 0;
957
+ console.log(` Dataset #${tmpDatasetID}: ${tmpRecordCount} records stored`);
958
+ return fStep();
959
+ });
960
+ }
961
+ });
962
+
963
+ // Run steps sequentially
964
+ runStepChain(tmpSteps, 0,
965
+ () =>
966
+ {
967
+ console.log('');
968
+ processDataset(pFable, pPort, pIndex + 1, fDone);
969
+ });
970
+ }
971
+
972
+ /**
973
+ * Run an array of sequential step functions.
974
+ */
975
+ function runStepChain(pSteps, pIndex, fDone)
976
+ {
977
+ if (pIndex >= pSteps.length)
978
+ {
979
+ return fDone();
980
+ }
981
+ pSteps[pIndex](
982
+ () =>
983
+ {
984
+ runStepChain(pSteps, pIndex + 1, fDone);
985
+ });
986
+ }
987
+
988
+ // ================================================================
989
+ // Ingest: HTTP Mode (POST to /facto/ingest/file)
990
+ // ================================================================
991
+
992
+ function ingestDataHTTP(pPort, pDatasetDef, pBuffer, pIDDataset, pIDSource, fCallback)
993
+ {
994
+ let tmpFormat = pDatasetDef.format;
995
+ let tmpContent = pBuffer.toString('utf8');
996
+
997
+ // Build the ingest request body
998
+ let tmpBody =
999
+ {
1000
+ IDDataset: pIDDataset,
1001
+ IDSource: pIDSource
1002
+ };
1003
+
1004
+ switch (tmpFormat)
1005
+ {
1006
+ case 'csv':
1007
+ tmpBody.Format = 'csv';
1008
+ tmpBody.Content = tmpContent;
1009
+ break;
1010
+
1011
+ case 'json':
1012
+ tmpBody.Format = 'json';
1013
+ tmpBody.Content = tmpContent;
1014
+ break;
1015
+
1016
+ case 'xml':
1017
+ tmpBody.Format = 'xml';
1018
+ tmpBody.Content = tmpContent;
1019
+ break;
1020
+
1021
+ case 'tsv':
1022
+ // Strip comment lines before sending, send as CSV with tab delimiter
1023
+ tmpBody.Format = 'csv';
1024
+ tmpBody.Content = tmpContent.split('\n')
1025
+ .filter((pLine) => !pLine.startsWith('#'))
1026
+ .join('\n');
1027
+ tmpBody.Delimiter = '\t';
1028
+ break;
1029
+
1030
+ case 'excel':
1031
+ // Special: convert BLS JSON → Excel → base64 for ingest
1032
+ return ingestAsExcelHTTP(pPort, pDatasetDef, tmpContent, pIDDataset, pIDSource, fCallback);
1033
+
1034
+ case 'fixed-width':
1035
+ tmpBody.Format = 'fixed-width';
1036
+ tmpBody.Content = tmpContent;
1037
+ if (pDatasetDef.parseOptions && pDatasetDef.parseOptions.columns)
1038
+ {
1039
+ tmpBody.Columns = pDatasetDef.parseOptions.columns;
1040
+ }
1041
+ // Apply maxLines limit by trimming content
1042
+ if (pDatasetDef.maxLines)
1043
+ {
1044
+ let tmpLines = tmpBody.Content.split('\n');
1045
+ if (tmpLines.length > pDatasetDef.maxLines)
1046
+ {
1047
+ tmpBody.Content = tmpLines.slice(0, pDatasetDef.maxLines).join('\n');
1048
+ }
1049
+ }
1050
+ break;
1051
+
1052
+ default:
1053
+ return fCallback(new Error(`Unknown format: ${tmpFormat}`));
1054
+ }
1055
+
1056
+ // Apply maxLines for non-fixed-width formats too
1057
+ if (tmpFormat !== 'fixed-width' && tmpFormat !== 'excel' && pDatasetDef.maxLines)
1058
+ {
1059
+ let tmpLines = tmpBody.Content.split('\n');
1060
+ if (tmpLines.length > pDatasetDef.maxLines + 1) // +1 for header
1061
+ {
1062
+ tmpBody.Content = tmpLines.slice(0, pDatasetDef.maxLines + 1).join('\n');
1063
+ }
1064
+ }
1065
+
1066
+ httpPost(pPort, '/facto/ingest/file', tmpBody,
1067
+ (pError, pResult) =>
1068
+ {
1069
+ if (pError)
1070
+ {
1071
+ return fCallback(pError);
1072
+ }
1073
+ if (pResult && pResult.Error)
1074
+ {
1075
+ return fCallback(new Error(pResult.Error));
1076
+ }
1077
+ return fCallback(null,
1078
+ {
1079
+ Ingested: (pResult && pResult.Ingested) || 0,
1080
+ Errors: (pResult && pResult.Errors) || 0,
1081
+ Total: (pResult && pResult.Total) || 0,
1082
+ DatasetVersion: pResult && pResult.DatasetVersion,
1083
+ IsDuplicate: pResult && pResult.IsDuplicate,
1084
+ ContentSignature: pResult && pResult.ContentSignature
1085
+ });
1086
+ });
1087
+ }
1088
+
1089
+ /**
1090
+ * For the Excel format demo in HTTP mode: parse the BLS JSON response,
1091
+ * write to a temporary .xlsx file, base64-encode it, and POST to the
1092
+ * /facto/ingest/file endpoint.
1093
+ */
1094
+ function ingestAsExcelHTTP(pPort, pDatasetDef, pContent, pIDDataset, pIDSource, fCallback)
1095
+ {
1096
+ try
1097
+ {
1098
+ let libXLSX = require('xlsx');
1099
+ let tmpJSON = JSON.parse(pContent);
1100
+
1101
+ // BLS API returns { Results: { series: [ { data: [...] } ] } }
1102
+ let tmpRecords = [];
1103
+ if (tmpJSON.Results && tmpJSON.Results.series)
1104
+ {
1105
+ for (let s = 0; s < tmpJSON.Results.series.length; s++)
1106
+ {
1107
+ let tmpSeries = tmpJSON.Results.series[s];
1108
+ if (tmpSeries.data && Array.isArray(tmpSeries.data))
1109
+ {
1110
+ tmpRecords = tmpRecords.concat(tmpSeries.data);
1111
+ }
1112
+ }
1113
+ }
1114
+
1115
+ if (tmpRecords.length === 0)
1116
+ {
1117
+ console.log(' BLS: No CPI data records found in response.');
1118
+ return fCallback(null, { Ingested: 0, Errors: 0, Total: 0 });
1119
+ }
1120
+
1121
+ console.log(` BLS: Converting ${tmpRecords.length} records to Excel...`);
1122
+
1123
+ // Write to buffer
1124
+ let tmpWorksheet = libXLSX.utils.json_to_sheet(tmpRecords);
1125
+ let tmpWorkbook = libXLSX.utils.book_new();
1126
+ libXLSX.utils.book_append_sheet(tmpWorkbook, tmpWorksheet, 'CPI Data');
1127
+ let tmpExcelBuffer = libXLSX.write(tmpWorkbook, { type: 'buffer', bookType: 'xlsx' });
1128
+
1129
+ console.log(` BLS: Excel buffer ${tmpExcelBuffer.length} bytes, sending as base64...`);
1130
+
1131
+ let tmpBody =
1132
+ {
1133
+ IDDataset: pIDDataset,
1134
+ IDSource: pIDSource,
1135
+ Format: 'excel',
1136
+ Content: tmpExcelBuffer.toString('base64')
1137
+ };
1138
+
1139
+ httpPost(pPort, '/facto/ingest/file', tmpBody,
1140
+ (pError, pResult) =>
1141
+ {
1142
+ if (pError)
1143
+ {
1144
+ return fCallback(pError);
1145
+ }
1146
+ if (pResult && pResult.Error)
1147
+ {
1148
+ return fCallback(new Error(pResult.Error));
1149
+ }
1150
+ return fCallback(null,
1151
+ {
1152
+ Ingested: (pResult && pResult.Ingested) || 0,
1153
+ Errors: (pResult && pResult.Errors) || 0,
1154
+ Total: (pResult && pResult.Total) || 0,
1155
+ DatasetVersion: pResult && pResult.DatasetVersion,
1156
+ IsDuplicate: pResult && pResult.IsDuplicate,
1157
+ ContentSignature: pResult && pResult.ContentSignature
1158
+ });
1159
+ });
1160
+ }
1161
+ catch (pError)
1162
+ {
1163
+ return fCallback(pError);
1164
+ }
1165
+ }
1166
+
1167
+ // ================================================================
1168
+ // Ingest: Direct Mode (parse + DAL)
1169
+ // ================================================================
1170
+
1171
+ function ingestDataDirect(pFable, pDatasetDef, pBuffer, pIDDataset, pIDSource, fCallback)
1172
+ {
1173
+ let tmpIngestEngine = pFable.RetoldFactoIngestEngine;
1174
+ let tmpFormat = pDatasetDef.format;
1175
+ let tmpCertainty = pDatasetDef.certainty;
1176
+ let tmpContent = pBuffer.toString('utf8');
1177
+
1178
+ let tmpParseCallback = (pParseError, pParsedRecords) =>
1179
+ {
1180
+ if (pParseError)
1181
+ {
1182
+ return fCallback(pParseError);
1183
+ }
1184
+
1185
+ if (!pParsedRecords || pParsedRecords.length === 0)
1186
+ {
1187
+ return fCallback(null, { Ingested: 0, Errors: 0, Total: 0 });
1188
+ }
1189
+
1190
+ // Limit records if specified
1191
+ if (pDatasetDef.maxLines && pParsedRecords.length > pDatasetDef.maxLines)
1192
+ {
1193
+ pParsedRecords = pParsedRecords.slice(0, pDatasetDef.maxLines);
1194
+ }
1195
+
1196
+ console.log(` Parsed: ${pParsedRecords.length} records`);
1197
+
1198
+ // Ingest records sequentially
1199
+ let tmpIngested = 0;
1200
+ let tmpErrors = 0;
1201
+
1202
+ let tmpIngestRecord = (pRecordIndex) =>
1203
+ {
1204
+ if (pRecordIndex >= pParsedRecords.length)
1205
+ {
1206
+ return fCallback(null,
1207
+ {
1208
+ Ingested: tmpIngested,
1209
+ Errors: tmpErrors,
1210
+ Total: pParsedRecords.length
1211
+ });
1212
+ }
1213
+
1214
+ let tmpRowData = pParsedRecords[pRecordIndex];
1215
+ let tmpRecordData = {
1216
+ IDDataset: pIDDataset,
1217
+ IDSource: pIDSource,
1218
+ Type: `gov-${tmpFormat}`,
1219
+ Version: 1,
1220
+ IngestDate: new Date().toISOString(),
1221
+ Content: (typeof tmpRowData === 'string') ? tmpRowData : JSON.stringify(tmpRowData)
1222
+ };
1223
+
1224
+ let tmpQuery = pFable.DAL.Record.query.clone()
1225
+ .addRecord(tmpRecordData);
1226
+
1227
+ pFable.DAL.Record.doCreate(tmpQuery,
1228
+ (pCreateError, pQuery, pQueryRead, pRecord) =>
1229
+ {
1230
+ if (pCreateError)
1231
+ {
1232
+ tmpErrors++;
1233
+ return tmpIngestRecord(pRecordIndex + 1);
1234
+ }
1235
+
1236
+ tmpIngested++;
1237
+
1238
+ // Create certainty index
1239
+ let tmpCIQuery = pFable.DAL.CertaintyIndex.query.clone()
1240
+ .addRecord(
1241
+ {
1242
+ IDRecord: pRecord.IDRecord,
1243
+ CertaintyValue: tmpCertainty,
1244
+ Dimension: 'overall',
1245
+ Justification: `${pDatasetDef.agency} official data`
1246
+ });
1247
+
1248
+ pFable.DAL.CertaintyIndex.doCreate(tmpCIQuery,
1249
+ () =>
1250
+ {
1251
+ return tmpIngestRecord(pRecordIndex + 1);
1252
+ });
1253
+ });
1254
+ };
1255
+
1256
+ tmpIngestRecord(0);
1257
+ };
1258
+
1259
+ // Route to appropriate parser
1260
+ switch (tmpFormat)
1261
+ {
1262
+ case 'csv':
1263
+ tmpIngestEngine.parseCSV(tmpContent, {}, tmpParseCallback);
1264
+ break;
1265
+
1266
+ case 'json':
1267
+ tmpIngestEngine.parseJSON(tmpContent, tmpParseCallback);
1268
+ break;
1269
+
1270
+ case 'xml':
1271
+ tmpIngestEngine.parseXML(tmpContent, pDatasetDef.parseOptions || {}, tmpParseCallback);
1272
+ break;
1273
+
1274
+ case 'tsv':
1275
+ tmpIngestEngine.parseCSV(tmpContent, pDatasetDef.parseOptions || { delimiter: '\t', stripCommentLines: true }, tmpParseCallback);
1276
+ break;
1277
+
1278
+ case 'excel':
1279
+ // Special: download JSON, convert to Excel, then parse back
1280
+ ingestAsExcelDirect(pFable, pDatasetDef, tmpContent, tmpParseCallback);
1281
+ break;
1282
+
1283
+ case 'fixed-width':
1284
+ tmpIngestEngine.parseFixedWidth(tmpContent, pDatasetDef.parseOptions || {}, tmpParseCallback);
1285
+ break;
1286
+
1287
+ default:
1288
+ return fCallback(new Error(`Unknown format: ${tmpFormat}`));
1289
+ }
1290
+ }
1291
+
1292
+ /**
1293
+ * For the Excel format demo in direct mode: parse the BLS JSON response,
1294
+ * write to a temporary .xlsx file, then read it back through parseExcel.
1295
+ */
1296
+ function ingestAsExcelDirect(pFable, pDatasetDef, pContent, fCallback)
1297
+ {
1298
+ try
1299
+ {
1300
+ let libXLSX = require('xlsx');
1301
+ let tmpJSON = JSON.parse(pContent);
1302
+
1303
+ // BLS API returns { Results: { series: [ { data: [...] } ] } }
1304
+ let tmpRecords = [];
1305
+ if (tmpJSON.Results && tmpJSON.Results.series)
1306
+ {
1307
+ for (let s = 0; s < tmpJSON.Results.series.length; s++)
1308
+ {
1309
+ let tmpSeries = tmpJSON.Results.series[s];
1310
+ if (tmpSeries.data && Array.isArray(tmpSeries.data))
1311
+ {
1312
+ tmpRecords = tmpRecords.concat(tmpSeries.data);
1313
+ }
1314
+ }
1315
+ }
1316
+
1317
+ if (tmpRecords.length === 0)
1318
+ {
1319
+ console.log(' BLS: No CPI data records found in response.');
1320
+ return fCallback(null, []);
1321
+ }
1322
+
1323
+ console.log(` BLS: Converting ${tmpRecords.length} records to Excel...`);
1324
+
1325
+ // Write to temporary .xlsx
1326
+ let tmpWorksheet = libXLSX.utils.json_to_sheet(tmpRecords);
1327
+ let tmpWorkbook = libXLSX.utils.book_new();
1328
+ libXLSX.utils.book_append_sheet(tmpWorkbook, tmpWorksheet, 'CPI Data');
1329
+
1330
+ let tmpExcelPath = libPath.join(__dirname, 'tmp-bls-cpi.xlsx');
1331
+ libXLSX.writeFile(tmpWorkbook, tmpExcelPath);
1332
+
1333
+ console.log(` BLS: Wrote temporary Excel file (${libFs.statSync(tmpExcelPath).size} bytes)`);
1334
+
1335
+ // Read it back as Buffer and parse through parseExcel
1336
+ let tmpExcelBuffer = libFs.readFileSync(tmpExcelPath);
1337
+ pFable.RetoldFactoIngestEngine.parseExcel(tmpExcelBuffer, {},
1338
+ (pParseError, pParsedRecords) =>
1339
+ {
1340
+ // Clean up temp file
1341
+ try { libFs.unlinkSync(tmpExcelPath); } catch (e) { /* ignore */ }
1342
+
1343
+ return fCallback(pParseError, pParsedRecords);
1344
+ });
1345
+ }
1346
+ catch (pError)
1347
+ {
1348
+ return fCallback(pError);
1349
+ }
1350
+ }
1351
+
1352
+ // ================================================================
1353
+ // Summary
1354
+ // ================================================================
1355
+
1356
+ function printSummary(pFable, pPort)
1357
+ {
1358
+ console.log('==========================================================');
1359
+ console.log(' WAREHOUSE SUMMARY');
1360
+ console.log('==========================================================');
1361
+
1362
+ if (pPort)
1363
+ {
1364
+ // HTTP mode: use the ProjectionEngine summary endpoint
1365
+ httpGet(pPort, '/facto/projections/summary',
1366
+ (pError, pSummary) =>
1367
+ {
1368
+ if (pError)
1369
+ {
1370
+ console.error(` Error fetching summary: ${pError.message}`);
1371
+ return;
1372
+ }
1373
+
1374
+ console.log(` Sources: ${pSummary.Sources}`);
1375
+ console.log(` Datasets: ${pSummary.Datasets}`);
1376
+ console.log(` Records: ${pSummary.Records}`);
1377
+ console.log(` Certainty Indices: ${pSummary.CertaintyIndices}`);
1378
+ console.log(` Ingest Jobs: ${pSummary.IngestJobs}`);
1379
+ console.log('');
1380
+ if (pSummary.DatasetsByType)
1381
+ {
1382
+ console.log(' Datasets by Type:');
1383
+ for (let tmpType in pSummary.DatasetsByType)
1384
+ {
1385
+ console.log(` ${tmpType}: ${pSummary.DatasetsByType[tmpType]}`);
1386
+ }
1387
+ }
1388
+ console.log('');
1389
+ console.log(` Server: http://localhost:${pPort}/facto/app/`);
1390
+ console.log('==========================================================');
1391
+ console.log(' Done!');
1392
+ console.log('');
1393
+ });
1394
+ }
1395
+ else
1396
+ {
1397
+ // Direct mode: DAL counts
1398
+ let tmpCountEntities = ['Source', 'Dataset', 'DatasetSource', 'SourceDocumentation', 'Record', 'CertaintyIndex'];
1399
+ let tmpCounts = {};
1400
+ let tmpRemaining = tmpCountEntities.length;
1401
+
1402
+ for (let i = 0; i < tmpCountEntities.length; i++)
1403
+ {
1404
+ let tmpEntity = tmpCountEntities[i];
1405
+
1406
+ let tmpQuery = pFable.DAL[tmpEntity].query.clone()
1407
+ .addFilter('Deleted', 0);
1408
+
1409
+ pFable.DAL[tmpEntity].doCount(tmpQuery,
1410
+ (pError, pQuery, pCount) =>
1411
+ {
1412
+ tmpCounts[tmpEntity] = (typeof pCount === 'number') ? pCount : parseInt(pCount, 10) || 0;
1413
+ tmpRemaining--;
1414
+
1415
+ if (tmpRemaining <= 0)
1416
+ {
1417
+ console.log(` Sources: ${tmpCounts.Source}`);
1418
+ console.log(` Datasets: ${tmpCounts.Dataset}`);
1419
+ console.log(` Dataset-Source Links: ${tmpCounts.DatasetSource}`);
1420
+ console.log(` Source Documentation: ${tmpCounts.SourceDocumentation}`);
1421
+ console.log(` Records: ${tmpCounts.Record}`);
1422
+ console.log(` Certainty Indices: ${tmpCounts.CertaintyIndex}`);
1423
+ console.log('');
1424
+ console.log(` Database: ${_DBPath}`);
1425
+ console.log('==========================================================');
1426
+ console.log(' Done!');
1427
+ console.log('');
1428
+ }
1429
+ });
1430
+ }
1431
+ }
1432
+ }