@tmlmobilidade/import-gtfs 20251009.1357.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ ## Import GTFS into SQLite DB
@@ -0,0 +1,9 @@
1
+ import { type GtfsSQLTables, type ImportGtfsToDatabaseConfig } from './src/types.js';
2
+ import { type Plan } from '@tmlmobilidade/types';
3
+ /**
4
+ * Imports GTFS data into the database for a given plan.
5
+ * @param plan The plan containing GTFS feed information.
6
+ * @param config Optional configuration for the import process.
7
+ * @returns A promise that resolves to the imported GTFS SQL tables.
8
+ */
9
+ export declare function importGtfsToDatabase(plan: Plan, config?: ImportGtfsToDatabaseConfig): Promise<GtfsSQLTables>;
package/dist/index.js ADDED
@@ -0,0 +1,65 @@
1
+ /* * */
2
+ import { processCalendarFile } from './src/processors/calendar.js';
3
+ import { processCalendarDatesFile } from './src/processors/calendar_dates.js';
4
+ import { processRoutesFile } from './src/processors/routes.js';
5
+ import { processShapesFile } from './src/processors/shapes.js';
6
+ import { processStopTimesFile } from './src/processors/stop_times.js';
7
+ import { processStopsFile } from './src/processors/stops.js';
8
+ import { processTripsFile } from './src/processors/trips.js';
9
+ import { downloadAndExtractGtfs } from './src/utils/extract-file.js';
10
+ import { initGtfsSqlTables } from './src/utils/init-tables.js';
11
+ import TIMETRACKER from '@helperkits/timer';
12
+ import { Logs } from '@tmlmobilidade/utils';
13
+ /**
14
+ * Imports GTFS data into the database for a given plan.
15
+ * @param plan The plan containing GTFS feed information.
16
+ * @param config Optional configuration for the import process.
17
+ * @returns A promise that resolves to the imported GTFS SQL tables.
18
+ */
19
+ export async function importGtfsToDatabase(plan, config = {}) {
20
+ try {
21
+ //
22
+ const globalTimer = new TIMETRACKER();
23
+ Logs.info(`Importing ${plan._id} GTFS to database...`);
24
+ //
25
+ // Initialize context for the current plan
26
+ const context = {
27
+ counters: {
28
+ calendar_dates: 0,
29
+ hashed_shapes: 0,
30
+ hashed_trips: 0,
31
+ shapes: 0,
32
+ stop_times: 0,
33
+ trips: 0,
34
+ },
35
+ gtfs: initGtfsSqlTables(),
36
+ plan: plan,
37
+ referenced_route_ids: new Set(),
38
+ referenced_shape_ids: new Set(),
39
+ workdir: await downloadAndExtractGtfs(plan),
40
+ };
41
+ //
42
+ // Validate GTFS feed info
43
+ if (!plan.gtfs_feed_info?.feed_start_date || !plan.gtfs_feed_info?.feed_end_date) {
44
+ throw new Error(`Plan "${plan._id}" is missing GTFS feed start and/or end date.`);
45
+ }
46
+ //
47
+ // Process GTFS files in the correct order
48
+ await processCalendarFile(context, config.start_date ?? plan.gtfs_feed_info.feed_start_date, config.end_date ?? plan.gtfs_feed_info.feed_end_date);
49
+ await processCalendarDatesFile(context, config.start_date ?? plan.gtfs_feed_info.feed_start_date, config.end_date ?? plan.gtfs_feed_info.feed_end_date);
50
+ await processTripsFile(context);
51
+ await processRoutesFile(context);
52
+ await processShapesFile(context);
53
+ await processStopsFile(context);
54
+ await processStopTimesFile(context);
55
+ Logs.success(`Finished importing GTFS to database for plan "${plan._id}" in ${globalTimer.get()}.`, 0);
56
+ Logs.divider();
57
+ Logs.terminate(`Finished importing GTFS to database in ${globalTimer.get()}.`);
58
+ return context.gtfs;
59
+ //
60
+ }
61
+ catch (error) {
62
+ Logs.error('Error parsing plan.', error);
63
+ throw error;
64
+ }
65
+ }
@@ -0,0 +1,11 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ import { OperationalDate } from '@tmlmobilidade/types';
3
+ /**
4
+ * Processes the calendar.txt file from the GTFS dataset.
5
+ * It extracts service_ids that are valid between the given start_date and end_date,
6
+ * and populates the context's calendar_dates map with operational dates for each service_id.
7
+ * @param context The import GTFS context to populate with calendar dates.
8
+ * @param startDate The start date of the range to filter service_ids.
9
+ * @param endDate The end date of the range to filter service_ids.
10
+ */
11
+ export declare function processCalendarFile(context: ImportGtfsContext, startDate: OperationalDate, endDate: OperationalDate): Promise<void>;
@@ -0,0 +1,80 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsCalendar } from '@tmlmobilidade/types';
5
+ import { Dates, getOperationalDatesFromRange, Logs } from '@tmlmobilidade/utils';
6
+ import fs from 'node:fs';
7
+ /**
8
+ * Processes the calendar.txt file from the GTFS dataset.
9
+ * It extracts service_ids that are valid between the given start_date and end_date,
10
+ * and populates the context's calendar_dates map with operational dates for each service_id.
11
+ * @param context The import GTFS context to populate with calendar dates.
12
+ * @param startDate The start date of the range to filter service_ids.
13
+ * @param endDate The end date of the range to filter service_ids.
14
+ */
15
+ export async function processCalendarFile(context, startDate, endDate) {
16
+ try {
17
+ //
18
+ const calendarParseTimer = new TIMETRACKER();
19
+ Logs.info(`Reading zip entry "calendar.txt"...`);
20
+ const parseEachRow = async (data) => {
21
+ //
22
+ //
23
+ // Validate the current row against the proper type
24
+ const validatedData = validateGtfsCalendar(data);
25
+ //
26
+ // Check if this service_id is between the given start_date and end_date.
27
+ // Clip the service_id's start and end dates to the given start and end dates.
28
+ let serviceIdStartDate = validatedData.start_date;
29
+ let serviceIdEndDate = validatedData.end_date;
30
+ if (serviceIdEndDate < startDate || serviceIdStartDate > endDate)
31
+ return;
32
+ if (serviceIdStartDate < startDate)
33
+ serviceIdStartDate = startDate;
34
+ if (serviceIdEndDate > endDate)
35
+ serviceIdEndDate = endDate;
36
+ //
37
+ // If we're here, it means the service_id is valid between the given dates.
38
+ // For the configured weekly schedule, create the individual operational dates
39
+ // for each day of the week that is active.
40
+ const allOperationalDatesInRange = getOperationalDatesFromRange(serviceIdStartDate, serviceIdEndDate);
41
+ const validOperationalDates = new Set();
42
+ for (const currentDate of allOperationalDatesInRange) {
43
+ const dayOfWeek = Dates.fromOperationalDate(currentDate, 'Europe/Lisbon').toFormat('c');
44
+ if (dayOfWeek === '1' && validatedData.monday === 1)
45
+ validOperationalDates.add(currentDate);
46
+ if (dayOfWeek === '2' && validatedData.tuesday === 1)
47
+ validOperationalDates.add(currentDate);
48
+ if (dayOfWeek === '3' && validatedData.wednesday === 1)
49
+ validOperationalDates.add(currentDate);
50
+ if (dayOfWeek === '4' && validatedData.thursday === 1)
51
+ validOperationalDates.add(currentDate);
52
+ if (dayOfWeek === '5' && validatedData.friday === 1)
53
+ validOperationalDates.add(currentDate);
54
+ if (dayOfWeek === '6' && validatedData.saturday === 1)
55
+ validOperationalDates.add(currentDate);
56
+ if (dayOfWeek === '7' && validatedData.sunday === 1)
57
+ validOperationalDates.add(currentDate);
58
+ }
59
+ //
60
+ // Save the valid operational dates for this service_id
61
+ context.gtfs.calendar_dates.set(validatedData.service_id, Array.from(validOperationalDates));
62
+ context.counters.calendar_dates += validOperationalDates.size;
63
+ //
64
+ };
65
+ //
66
+ // Setup the CSV parsing operation only if the file exists
67
+ if (fs.existsSync(`${context.workdir.extract_dir_path}/calendar.txt`)) {
68
+ await parseCsvFile(`${context.workdir.extract_dir_path}/calendar.txt`, parseEachRow);
69
+ Logs.success(`Finished processing "calendar.txt": ${context.gtfs.calendar_dates.size} rows saved in ${calendarParseTimer.get()}.`, 1);
70
+ }
71
+ else {
72
+ Logs.info(`Optional file "calendar.txt" not found. This may or may not be an error. Proceeding...`, 1);
73
+ }
74
+ //
75
+ }
76
+ catch (error) {
77
+ Logs.error('Error processing "calendar.txt" file.', error);
78
+ throw new Error('✖︎ Error processing "calendar.txt" file.');
79
+ }
80
+ }
@@ -0,0 +1,11 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ import { OperationalDate } from '@tmlmobilidade/types';
3
+ /**
4
+ * Processes the calendar_dates.txt file from the GTFS dataset.
5
+ * It extracts service_ids that are valid between the given start_date and end_date,
6
+ * and updates the context's calendar_dates map with the new dates.
7
+ * @param context The import GTFS context to populate with calendar dates.
8
+ * @param startDate The start date of the range to filter service_ids.
9
+ * @param endDate The end date of the range to filter service_ids.
10
+ */
11
+ export declare function processCalendarDatesFile(context: ImportGtfsContext, startDate: OperationalDate, endDate: OperationalDate): Promise<void>;
@@ -0,0 +1,74 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsCalendarDate } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ import fs from 'node:fs';
7
+ /**
8
+ * Processes the calendar_dates.txt file from the GTFS dataset.
9
+ * It extracts service_ids that are valid between the given start_date and end_date,
10
+ * and updates the context's calendar_dates map with the new dates.
11
+ * @param context The import GTFS context to populate with calendar dates.
12
+ * @param startDate The start date of the range to filter service_ids.
13
+ * @param endDate The end date of the range to filter service_ids.
14
+ */
15
+ export async function processCalendarDatesFile(context, startDate, endDate) {
16
+ try {
17
+ //
18
+ const calendarDatesParseTimer = new TIMETRACKER();
19
+ Logs.info(`Reading zip entry "calendar_dates.txt"...`);
20
+ const parseEachRow = async (data) => {
21
+ //
22
+ //
23
+ // Validate the current row against the proper type
24
+ const validatedData = validateGtfsCalendarDate(data);
25
+ //
26
+ // Skip if this row's date is not between the given start and end dates
27
+ if (validatedData.date < startDate || validatedData.date > endDate)
28
+ return;
29
+ //
30
+ // If we're here, it means the service_id is valid between the given dates.
31
+ // Get the previously saved calendars and check if it exists for this service_id.
32
+ const savedCalendar = context.gtfs.calendar_dates.get(validatedData.service_id);
33
+ if (savedCalendar) {
34
+ // Create a new Set to avoid duplicated dates
35
+ const updatedCalendar = new Set(savedCalendar);
36
+ // If this service_id was previously saved, either add or remove the current date
37
+ // to it based on the exception_type value for this row.
38
+ if (validatedData.exception_type === 1) {
39
+ updatedCalendar.add(validatedData.date);
40
+ context.counters.calendar_dates++;
41
+ }
42
+ else if (validatedData.exception_type === 2) {
43
+ updatedCalendar.delete(validatedData.date);
44
+ context.counters.calendar_dates--;
45
+ }
46
+ // Update the service_id with the new dates
47
+ context.gtfs.calendar_dates.set(validatedData.service_id, Array.from(updatedCalendar));
48
+ }
49
+ else {
50
+ // If this is the first time we're seeing this service_id, then it is only necessary
51
+ // to initiate a new dates array if it is a service addition
52
+ if (validatedData.exception_type === 1) {
53
+ context.gtfs.calendar_dates.set(validatedData.service_id, [validatedData.date]);
54
+ context.counters.calendar_dates++;
55
+ }
56
+ }
57
+ //
58
+ };
59
+ //
60
+ // Setup the CSV parsing operation only if the file exists
61
+ if (fs.existsSync(`${context.workdir.extract_dir_path}/calendar_dates.txt`)) {
62
+ await parseCsvFile(`${context.workdir.extract_dir_path}/calendar_dates.txt`, parseEachRow);
63
+ Logs.success(`Finished processing "calendar_dates.txt": ${context.gtfs.calendar_dates.size} rows saved in ${calendarDatesParseTimer.get()}.`, 1);
64
+ }
65
+ else {
66
+ Logs.info(`Optional file "calendar_dates.txt" not found. This may or may not be an error. Proceeding...`, 1);
67
+ }
68
+ //
69
+ }
70
+ catch (error) {
71
+ Logs.error('Error processing "calendar_dates.txt" file.', error);
72
+ throw new Error('✖︎ Error processing "calendar_dates.txt" file.');
73
+ }
74
+ }
@@ -0,0 +1,7 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Processes the routes.txt file from the GTFS dataset.
4
+ * It filters routes based on the previously saved trips.
5
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
6
+ */
7
+ export declare function processRoutesFile(context: ImportGtfsContext): Promise<void>;
@@ -0,0 +1,37 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsRouteExtended } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ /**
7
+ * Processes the routes.txt file from the GTFS dataset.
8
+ * It filters routes based on the previously saved trips.
9
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
10
+ */
11
+ export async function processRoutesFile(context) {
12
+ try {
13
+ //
14
+ const routesParseTimer = new TIMETRACKER();
15
+ Logs.info(`Reading zip entry "routes.txt"...`);
16
+ const parseEachRow = async (data) => {
17
+ // Validate the current row against the proper type
18
+ const validatedData = validateGtfsRouteExtended(data);
19
+ // For each route, only save the ones referenced
20
+ // by the previously saved trips.
21
+ if (!context.referenced_route_ids.has(validatedData.route_id))
22
+ return;
23
+ // Save the exported row
24
+ context.gtfs.routes.write(validatedData);
25
+ };
26
+ //
27
+ // Setup the CSV parsing operation
28
+ await parseCsvFile(`${context.workdir.extract_dir_path}/routes.txt`, parseEachRow);
29
+ context.gtfs.routes.flush();
30
+ Logs.success(`Finished processing "routes.txt": ${context.gtfs.routes.size} rows saved in ${routesParseTimer.get()}.`, 1);
31
+ //
32
+ }
33
+ catch (error) {
34
+ Logs.error('Error processing "routes.txt" file.', error);
35
+ throw new Error('✖︎ Error processing "routes.txt" file.');
36
+ }
37
+ }
@@ -0,0 +1,7 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Processes the shapes.txt file from the GTFS dataset.
4
+ * Include only the shapes referenced by the previously saved trips.
5
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
6
+ */
7
+ export declare function processShapesFile(context: ImportGtfsContext): Promise<void>;
@@ -0,0 +1,42 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsShape } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ /**
7
+ * Processes the shapes.txt file from the GTFS dataset.
8
+ * Include only the shapes referenced by the previously saved trips.
9
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
10
+ */
11
+ export async function processShapesFile(context) {
12
+ try {
13
+ //
14
+ const shapesParseTimer = new TIMETRACKER();
15
+ Logs.info(`Reading zip entry "shapes.txt"...`);
16
+ const parseEachRow = async (data) => {
17
+ // Validate the current row against the proper type
18
+ const validatedData = validateGtfsShape(data);
19
+ // For each route, only save the ones referenced
20
+ // by the previously saved trips.
21
+ if (!context.referenced_shape_ids.has(validatedData.shape_id))
22
+ return;
23
+ // Save the exported row
24
+ context.gtfs.shapes.write(validatedData);
25
+ // Log progress
26
+ if (context.counters.shapes % 100000 === 0)
27
+ Logs.info(`Parsed ${context.counters.shapes} shapes.txt rows so far.`);
28
+ // Increment the counter
29
+ context.counters.shapes++;
30
+ };
31
+ //
32
+ // Setup the CSV parsing operation
33
+ await parseCsvFile(`${context.workdir.extract_dir_path}/shapes.txt`, parseEachRow);
34
+ context.gtfs.shapes.flush();
35
+ Logs.success(`Finished processing "shapes.txt": ${context.gtfs.shapes.size} rows saved in ${shapesParseTimer.get()}.`, 1);
36
+ //
37
+ }
38
+ catch (error) {
39
+ Logs.error('Error processing "shapes.txt" file.', error);
40
+ throw new Error('✖︎ Error processing "shapes.txt" file.');
41
+ }
42
+ }
@@ -0,0 +1,9 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Processes the stop_times.txt file from the GTFS dataset.
4
+ * Only include the stop_times for trips referenced before.
5
+ * Since this is the most resource intensive operation of them all,
6
+ * include the associated stop data right away to avoid another lookup later.
7
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
8
+ */
9
+ export declare function processStopTimesFile(context: ImportGtfsContext): Promise<void>;
@@ -0,0 +1,48 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsStopTime } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ /**
7
+ * Processes the stop_times.txt file from the GTFS dataset.
8
+ * Only include the stop_times for trips referenced before.
9
+ * Since this is the most resource intensive operation of them all,
10
+ * include the associated stop data right away to avoid another lookup later.
11
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
12
+ */
13
+ export async function processStopTimesFile(context) {
14
+ try {
15
+ //
16
+ const stopTimesParseTimer = new TIMETRACKER();
17
+ Logs.info(`Reading zip entry "stop_times.txt"...`);
18
+ const parseEachRow = async (data) => {
19
+ // Validate the current row against the proper type
20
+ const validatedData = validateGtfsStopTime(data);
21
+ // Skip if this row's trip_id was not saved before.
22
+ const tripData = context.gtfs.trips.get('trip_id', validatedData.trip_id);
23
+ if (!tripData)
24
+ return;
25
+ // Also, check if the stop_id is valid and was saved before.
26
+ const stopData = context.gtfs.stops.get('stop_id', validatedData.stop_id);
27
+ if (!stopData)
28
+ return;
29
+ // Save the exported row
30
+ context.gtfs.stop_times.write(validatedData);
31
+ // Log progress
32
+ if (context.counters.stop_times % 100000 === 0)
33
+ Logs.info(`Parsed ${context.counters.stop_times} stop_times.txt rows so far.`);
34
+ // Increment the counter
35
+ context.counters.stop_times++;
36
+ };
37
+ //
38
+ // Setup the CSV parsing operation
39
+ await parseCsvFile(`${context.workdir.extract_dir_path}/stop_times.txt`, parseEachRow);
40
+ context.gtfs.stop_times.flush();
41
+ Logs.success(`Finished processing "stop_times.txt": ${context.counters.stop_times} rows saved in ${stopTimesParseTimer.get()}.`, 1);
42
+ //
43
+ }
44
+ catch (error) {
45
+ Logs.error('Error processing "stop_times.txt" file.', error);
46
+ throw new Error('✖︎ Error processing "stop_times.txt" file.');
47
+ }
48
+ }
@@ -0,0 +1,8 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Processes the stops.txt file from the GTFS dataset.
4
+ * include all of them since we don't have a way to filter them yet like trips/routes/shapes.
5
+ * By saving all of them, we also speed up the processing of each stop_time by including the stop data right away.
6
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
7
+ */
8
+ export declare function processStopsFile(context: ImportGtfsContext): Promise<void>;
@@ -0,0 +1,37 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsStopExtended } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ /**
7
+ * Processes the stops.txt file from the GTFS dataset.
8
+ * include all of them since we don't have a way to filter them yet like trips/routes/shapes.
9
+ * By saving all of them, we also speed up the processing of each stop_time by including the stop data right away.
10
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
11
+ */
12
+ export async function processStopsFile(context) {
13
+ try {
14
+ //
15
+ const stopsParseTimer = new TIMETRACKER();
16
+ Logs.info(`Reading zip entry "stops.txt"...`);
17
+ const parseEachRow = async (data) => {
18
+ // Validate the current row against the proper type
19
+ const validatedData = validateGtfsStopExtended(data);
20
+ // Skip if stop already exists
21
+ if (context.gtfs.stops.get('stop_id', validatedData.stop_id))
22
+ return;
23
+ // Save the exported row
24
+ context.gtfs.stops.write(validatedData);
25
+ };
26
+ //
27
+ // Setup the CSV parsing operation
28
+ await parseCsvFile(`${context.workdir.extract_dir_path}/stops.txt`, parseEachRow);
29
+ context.gtfs.stops.flush();
30
+ Logs.success(`Finished processing "stops.txt": ${context.gtfs.stops.size} rows saved in ${stopsParseTimer.get()}.`, 1);
31
+ //
32
+ }
33
+ catch (error) {
34
+ Logs.error('Error processing "stops.txt" file.', error);
35
+ throw new Error('✖︎ Error processing "stops.txt" file.');
36
+ }
37
+ }
@@ -0,0 +1,7 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Processes the trips.txt file from the GTFS dataset.
4
+ * It filters trips based on the previously saved calendar dates.
5
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
6
+ */
7
+ export declare function processTripsFile(context: ImportGtfsContext): Promise<void>;
@@ -0,0 +1,45 @@
1
+ /* * */
2
+ import { parseCsvFile } from '../utils/parse-csv.js';
3
+ import TIMETRACKER from '@helperkits/timer';
4
+ import { validateGtfsTripExtended } from '@tmlmobilidade/types';
5
+ import { Logs } from '@tmlmobilidade/utils';
6
+ /**
7
+ * Processes the trips.txt file from the GTFS dataset.
8
+ * It filters trips based on the previously saved calendar dates.
9
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
10
+ */
11
+ export async function processTripsFile(context) {
12
+ try {
13
+ //
14
+ const tripsParseTimer = new TIMETRACKER();
15
+ Logs.info(`Reading zip entry "trips.txt"...`);
16
+ const parseEachRow = async (data) => {
17
+ // Validate the current row against the proper type
18
+ const validatedData = validateGtfsTripExtended(data);
19
+ // For each trip, check if the associated service_id was saved
20
+ // in the previous step or not. Include it if yes, skip otherwise.
21
+ if (!context.gtfs.calendar_dates.has(validatedData.service_id))
22
+ return;
23
+ // Save the exported row
24
+ context.gtfs.trips.write(validatedData);
25
+ // Reference the associated entities to filter them later.
26
+ context.referenced_route_ids.add(validatedData.route_id);
27
+ context.referenced_shape_ids.add(validatedData.shape_id);
28
+ // Log progress
29
+ if (context.counters.trips % 10000 === 0)
30
+ Logs.info(`Parsed ${context.counters.trips} trips.txt rows so far.`);
31
+ // Increment the counter
32
+ context.counters.trips++;
33
+ };
34
+ //
35
+ // Setup the CSV parsing operation
36
+ await parseCsvFile(`${context.workdir.extract_dir_path}/trips.txt`, parseEachRow);
37
+ context.gtfs.trips.flush();
38
+ Logs.success(`Finished processing "trips.txt": ${context.gtfs.trips.size} rows saved in ${tripsParseTimer.get()}.`, 1);
39
+ //
40
+ }
41
+ catch (error) {
42
+ Logs.error('Error processing "trips.txt" file.', error);
43
+ throw new Error('✖︎ Error processing "trips.txt" file.');
44
+ }
45
+ }
@@ -0,0 +1,47 @@
1
+ import { SQLiteWriter } from '@tmlmobilidade/connectors';
2
+ import { type GTFS_Route_Extended, type GTFS_Shape, type GTFS_Stop_Extended, type GTFS_StopTime, type GTFS_Trip_Extended, type Plan } from '@tmlmobilidade/types';
3
+ import { type OperationalDate } from '@tmlmobilidade/types';
4
+ /**
5
+ * Configuration options for importing GTFS data into a database.
6
+ */
7
+ export interface ImportGtfsToDatabaseConfig {
8
+ end_date?: OperationalDate;
9
+ start_date?: OperationalDate;
10
+ }
11
+ /**
12
+ * Holds references to all GTFS-related SQL tables and writers.
13
+ * Each property corresponds to a specific GTFS entity and is associated
14
+ * with a SQLiteWriter instance for that entity.
15
+ */
16
+ export interface GtfsSQLTables {
17
+ calendar_dates: Map<string, OperationalDate[]>;
18
+ routes: SQLiteWriter<GTFS_Route_Extended>;
19
+ shapes: SQLiteWriter<GTFS_Shape>;
20
+ stop_times: SQLiteWriter<GTFS_StopTime>;
21
+ stops: SQLiteWriter<GTFS_Stop_Extended>;
22
+ trips: SQLiteWriter<GTFS_Trip_Extended>;
23
+ }
24
+ /**
25
+ * Context object used throughout the GTFS import process.
26
+ * It contains counters for various entities, references to GTFS SQL tables,
27
+ * the original plan metadata, sets of referenced IDs, and paths for working directories.
28
+ */
29
+ export interface ImportGtfsContext {
30
+ counters: {
31
+ calendar_dates: number;
32
+ hashed_shapes: number;
33
+ hashed_trips: number;
34
+ shapes: number;
35
+ stop_times: number;
36
+ trips: number;
37
+ };
38
+ gtfs: GtfsSQLTables;
39
+ plan: Plan;
40
+ referenced_route_ids: Set<string>;
41
+ referenced_shape_ids: Set<string>;
42
+ workdir: {
43
+ download_file_path: string;
44
+ extract_dir_path: string;
45
+ path: string;
46
+ };
47
+ }
@@ -0,0 +1,2 @@
1
+ /* * */
2
+ export {};
@@ -0,0 +1,8 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ import { type Plan } from '@tmlmobilidade/types';
3
+ /**
4
+ * Downloads and extracts the GTFS files for the given plan.
5
+ * @param plan The plan containing the operation_file_id to download and extract.
6
+ * @returns The working directory containing the extracted GTFS files.
7
+ */
8
+ export declare function downloadAndExtractGtfs(plan: Plan): Promise<ImportGtfsContext['workdir']>;
@@ -0,0 +1,63 @@
1
+ /* * */
2
+ import { unzipFile } from './unzip-file.js';
3
+ import { files } from '@tmlmobilidade/interfaces';
4
+ import { Logs } from '@tmlmobilidade/utils';
5
+ import fs from 'node:fs';
6
+ /**
7
+ * Downloads and extracts the GTFS files for the given plan.
8
+ * @param plan The plan containing the operation_file_id to download and extract.
9
+ * @returns The working directory containing the extracted GTFS files.
10
+ */
11
+ export async function downloadAndExtractGtfs(plan) {
12
+ //
13
+ // Return early if no operation file is found
14
+ if (!plan.operation_file_id) {
15
+ Logs.error(`No operation file found for plan "${plan._id}".`);
16
+ process.exit(1);
17
+ }
18
+ //
19
+ // Prepare the working directory
20
+ const workdirPath = `/tmp/${plan._id}`;
21
+ const downloadFilePath = `${workdirPath}/${plan.operation_file_id}.zip`;
22
+ const extractDirPath = `${workdirPath}/extracted`;
23
+ try {
24
+ fs.rmSync(workdirPath, { force: true, recursive: true });
25
+ fs.mkdirSync(workdirPath, { recursive: true });
26
+ Logs.success('Prepared working directory.', 1);
27
+ }
28
+ catch (error) {
29
+ Logs.error(`Error preparing workdir path "${workdirPath}".`, error);
30
+ process.exit(1);
31
+ }
32
+ //
33
+ // Get the associated Operation GTFS archive URL,
34
+ // and try to download, save and unzip it.
35
+ const operationFileData = await files.findById(plan.operation_file_id);
36
+ if (!operationFileData || !operationFileData.url) {
37
+ Logs.error(`No operation file found for plan "${plan._id}".`);
38
+ process.exit(1);
39
+ }
40
+ try {
41
+ const downloadResponse = await fetch(operationFileData.url);
42
+ const downloadArrayBuffer = await downloadResponse.arrayBuffer();
43
+ fs.writeFileSync(downloadFilePath, Buffer.from(downloadArrayBuffer));
44
+ }
45
+ catch (error) {
46
+ Logs.error('Error downloading the file.', error);
47
+ process.exit(1);
48
+ }
49
+ try {
50
+ await unzipFile(downloadFilePath, extractDirPath);
51
+ Logs.success(`Unzipped GTFS file from "${downloadFilePath}" to "${extractDirPath}".`, 1);
52
+ }
53
+ catch (error) {
54
+ Logs.error('Error unzipping the file.', error);
55
+ process.exit(1);
56
+ }
57
+ return {
58
+ download_file_path: downloadFilePath,
59
+ extract_dir_path: extractDirPath,
60
+ path: workdirPath,
61
+ };
62
+ //
63
+ }
@@ -0,0 +1,2 @@
1
+ import { type OperationalDate, type UnixTimestamp } from '@tmlmobilidade/types';
2
+ export declare const convertGTFSTimeStringAndOperationalDateToUnixTimestamp: (timeString: string, operationalDate: OperationalDate) => UnixTimestamp;
@@ -0,0 +1,19 @@
1
+ /* * */
2
+ import { Dates } from '@tmlmobilidade/utils';
3
+ /* * */
4
+ export const convertGTFSTimeStringAndOperationalDateToUnixTimestamp = (timeString, operationalDate) => {
5
+ //
6
+ // Return early if no time string is provided
7
+ if (!timeString || !operationalDate)
8
+ throw new Error(`✖︎ No time string or operational date provided. timeString: ${timeString}, operationalDate: ${operationalDate}`);
9
+ // Check if the timestring is in the format HH:MM:SS
10
+ if (!/^\d{2}:\d{2}:\d{2}$/.test(timeString))
11
+ throw new Error(`✖︎ Invalid time string format. timeString: ${timeString}`);
12
+ // Extract the individual components of the time string (HH:MM:SS)
13
+ const [hoursOperation, minutesOperation, secondsOperation] = timeString.split(':').map(Number);
14
+ return Dates
15
+ .fromOperationalDate(operationalDate, 'Europe/Lisbon')
16
+ .set({ hour: hoursOperation, minute: minutesOperation, second: secondsOperation })
17
+ .unix_timestamp;
18
+ //
19
+ };
@@ -0,0 +1,6 @@
1
+ import { type ImportGtfsContext } from '../types.js';
2
+ /**
3
+ * Initializes GTFS SQL tables and writers.
4
+ * @returns An object containing initialized GTFS SQL tables and writers.
5
+ */
6
+ export declare function initGtfsSqlTables(): ImportGtfsContext['gtfs'];
@@ -0,0 +1,120 @@
1
+ /* * */
2
+ import { SQLiteWriter } from '@tmlmobilidade/connectors';
3
+ /**
4
+ * Initializes GTFS SQL tables and writers.
5
+ * @returns An object containing initialized GTFS SQL tables and writers.
6
+ */
7
+ export function initGtfsSqlTables() {
8
+ //
9
+ const calendarDatesMap = new Map();
10
+ const tripsWriter = new SQLiteWriter({
11
+ batch_size: 10000,
12
+ columns: [
13
+ { indexed: true, name: 'trip_id', not_null: true, primary_key: true, type: 'TEXT' },
14
+ { indexed: false, name: 'bikes_allowed', type: 'INTEGER' },
15
+ { indexed: false, name: 'block_id', type: 'TEXT' },
16
+ { indexed: false, name: 'direction_id', not_null: true, type: 'INTEGER' },
17
+ { indexed: false, name: 'route_id', not_null: true, type: 'TEXT' },
18
+ { indexed: false, name: 'service_id', not_null: true, type: 'TEXT' },
19
+ { indexed: false, name: 'shape_id', not_null: true, type: 'TEXT' },
20
+ { indexed: false, name: 'trip_headsign', not_null: true, type: 'TEXT' },
21
+ { indexed: false, name: 'trip_short_name', type: 'TEXT' },
22
+ { indexed: false, name: 'wheelchair_accessible', type: 'INTEGER' },
23
+ { indexed: false, name: 'pattern_id', not_null: true, type: 'TEXT' },
24
+ ],
25
+ });
26
+ const routesWriter = new SQLiteWriter({
27
+ batch_size: 10000,
28
+ columns: [
29
+ { indexed: false, name: 'agency_id', not_null: true, type: 'TEXT' },
30
+ { indexed: false, name: 'continuous_drop_off', type: 'INTEGER' },
31
+ { indexed: false, name: 'continuous_pickup', type: 'INTEGER' },
32
+ { indexed: false, name: 'route_color', not_null: true, type: 'TEXT' },
33
+ { indexed: false, name: 'route_desc', type: 'TEXT' },
34
+ { indexed: true, name: 'route_id', not_null: true, primary_key: true, type: 'TEXT' },
35
+ { indexed: false, name: 'route_long_name', not_null: true, type: 'TEXT' },
36
+ { indexed: false, name: 'route_short_name', not_null: true, type: 'TEXT' },
37
+ { indexed: false, name: 'route_sort_order', type: 'INTEGER' },
38
+ { indexed: false, name: 'route_text_color', not_null: true, type: 'TEXT' },
39
+ { indexed: false, name: 'route_type', not_null: true, type: 'INTEGER' },
40
+ { indexed: false, name: 'route_url', type: 'TEXT' },
41
+ { indexed: false, name: 'circular', type: 'INTEGER' },
42
+ { indexed: false, name: 'line_id', not_null: true, type: 'INTEGER' },
43
+ { indexed: false, name: 'line_long_name', not_null: true, type: 'TEXT' },
44
+ { indexed: false, name: 'line_short_name', not_null: true, type: 'TEXT' },
45
+ { indexed: false, name: 'path_type', type: 'INTEGER' },
46
+ { indexed: false, name: 'route_remarks', type: 'TEXT' },
47
+ { indexed: false, name: 'school', type: 'INTEGER' },
48
+ ],
49
+ });
50
+ const shapesWriter = new SQLiteWriter({
51
+ batch_size: 100000,
52
+ columns: [
53
+ { indexed: true, name: 'shape_id', not_null: true, type: 'TEXT' },
54
+ { indexed: false, name: 'shape_pt_lat', not_null: true, type: 'REAL' },
55
+ { indexed: false, name: 'shape_pt_lon', not_null: true, type: 'REAL' },
56
+ { indexed: false, name: 'shape_pt_sequence', not_null: true, type: 'INTEGER' },
57
+ { indexed: false, name: 'shape_dist_traveled', not_null: true, type: 'REAL' },
58
+ ],
59
+ });
60
+ const stopsWriter = new SQLiteWriter({
61
+ batch_size: 10000,
62
+ columns: [
63
+ { indexed: false, name: 'level_id', type: 'TEXT' },
64
+ { indexed: false, name: 'location_type', type: 'INTEGER' },
65
+ { indexed: false, name: 'parent_station', type: 'TEXT' },
66
+ { indexed: false, name: 'platform_code', type: 'TEXT' },
67
+ { indexed: false, name: 'stop_code', type: 'TEXT' },
68
+ { indexed: false, name: 'stop_desc', type: 'TEXT' },
69
+ { indexed: true, name: 'stop_id', not_null: true, primary_key: true, type: 'TEXT' },
70
+ { indexed: false, name: 'stop_lat', not_null: true, type: 'REAL' },
71
+ { indexed: false, name: 'stop_lon', not_null: true, type: 'REAL' },
72
+ { indexed: false, name: 'stop_name', not_null: true, type: 'TEXT' },
73
+ { indexed: false, name: 'stop_timezone', type: 'TEXT' },
74
+ { indexed: false, name: 'stop_url', type: 'TEXT' },
75
+ { indexed: false, name: 'wheelchair_boarding', type: 'INTEGER' },
76
+ { indexed: false, name: 'zone_id', type: 'TEXT' },
77
+ { indexed: false, name: 'has_bench', type: 'INTEGER' },
78
+ { indexed: false, name: 'has_network_map', type: 'INTEGER' },
79
+ { indexed: false, name: 'has_pip_real_time', type: 'INTEGER' },
80
+ { indexed: false, name: 'has_schedules', type: 'INTEGER' },
81
+ { indexed: false, name: 'has_shelter', type: 'INTEGER' },
82
+ { indexed: false, name: 'has_stop_sign', type: 'INTEGER' },
83
+ { indexed: false, name: 'has_tariffs_information', type: 'INTEGER' },
84
+ { indexed: false, name: 'municipality_id', type: 'TEXT' },
85
+ { indexed: false, name: 'parish_id', type: 'TEXT' },
86
+ { indexed: false, name: 'public_visible', type: 'INTEGER' },
87
+ { indexed: false, name: 'region_id', type: 'TEXT' },
88
+ { indexed: false, name: 'shelter_code', type: 'TEXT' },
89
+ { indexed: false, name: 'shelter_maintainer', type: 'TEXT' },
90
+ { indexed: false, name: 'stop_short_name', type: 'TEXT' },
91
+ { indexed: false, name: 'tts_stop_name', type: 'TEXT' },
92
+ ],
93
+ });
94
+ const stopTimesWriter = new SQLiteWriter({
95
+ batch_size: 100000,
96
+ columns: [
97
+ { indexed: false, name: 'arrival_time', not_null: true, type: 'TEXT' },
98
+ { indexed: false, name: 'continuous_drop_off', type: 'INTEGER' },
99
+ { indexed: false, name: 'continuous_pickup', type: 'INTEGER' },
100
+ { indexed: false, name: 'departure_time', not_null: true, type: 'TEXT' },
101
+ { indexed: false, name: 'drop_off_type', type: 'INTEGER' },
102
+ { indexed: false, name: 'pickup_type', type: 'INTEGER' },
103
+ { indexed: false, name: 'shape_dist_traveled', not_null: true, type: 'REAL' },
104
+ { indexed: false, name: 'stop_headsign', type: 'TEXT' },
105
+ { indexed: true, name: 'stop_id', not_null: true, type: 'TEXT' },
106
+ { indexed: true, name: 'trip_id', not_null: true, type: 'TEXT' },
107
+ { indexed: false, name: 'stop_sequence', not_null: true, type: 'INTEGER' },
108
+ { indexed: false, name: 'timepoint', type: 'INTEGER' },
109
+ ],
110
+ });
111
+ return {
112
+ calendar_dates: calendarDatesMap,
113
+ routes: routesWriter,
114
+ shapes: shapesWriter,
115
+ stop_times: stopTimesWriter,
116
+ stops: stopsWriter,
117
+ trips: tripsWriter,
118
+ };
119
+ //
120
+ }
@@ -0,0 +1 @@
1
+ export declare function parseCsvFile(filePath: string, rowParser: (rowData: any) => Promise<void>): Promise<void>;
@@ -0,0 +1,13 @@
1
+ /* * */
2
+ import { parse as csvParser } from 'csv-parse';
3
+ import fs from 'fs';
4
+ /* * */
5
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
6
+ export async function parseCsvFile(filePath, rowParser) {
7
+ const parser = csvParser({ bom: true, columns: true, record_delimiter: ['\n', '\r', '\r\n'], skip_empty_lines: true, trim: true });
8
+ const fileStream = fs.createReadStream(filePath);
9
+ const stream = fileStream.pipe(parser);
10
+ for await (const rowData of stream) {
11
+ await rowParser(rowData);
12
+ }
13
+ }
@@ -0,0 +1,2 @@
1
+ export declare const unzipFile: (zipFilePath: any, outputDir: any) => Promise<void>;
2
+ export declare const setDirectoryPermissions: (dirPath: any, mode?: number) => void;
@@ -0,0 +1,21 @@
1
+ /* * */
2
+ import extract from 'extract-zip';
3
+ import fs from 'fs';
4
+ /* * */
5
+ export const unzipFile = async (zipFilePath, outputDir) => {
6
+ await extract(zipFilePath, { dir: outputDir });
7
+ setDirectoryPermissions(outputDir);
8
+ };
9
+ /* * */
10
+ export const setDirectoryPermissions = (dirPath, mode = 0o666) => {
11
+ const files = fs.readdirSync(dirPath, { withFileTypes: true });
12
+ for (const file of files) {
13
+ const filePath = `${dirPath}/${file.name}`;
14
+ if (file.isDirectory()) {
15
+ setDirectoryPermissions(filePath, mode);
16
+ }
17
+ else {
18
+ fs.chmodSync(filePath, mode);
19
+ }
20
+ }
21
+ };
package/package.json ADDED
@@ -0,0 +1,54 @@
1
+ {
2
+ "name": "@tmlmobilidade/import-gtfs",
3
+ "version": "20251009.1357.48",
4
+ "author": "João de Vasconcelos & Jusi Monteiro",
5
+ "license": "AGPL-3.0-or-later",
6
+ "homepage": "https://github.com/tmlmobilidade/services#readme",
7
+ "bugs": {
8
+ "url": "https://github.com/tmlmobilidade/services/issues"
9
+ },
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/tmlmobilidade/services.git"
13
+ },
14
+ "keywords": [
15
+ "public transit",
16
+ "tml",
17
+ "transportes metropolitanos de lisboa",
18
+ "services"
19
+ ],
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "type": "module",
24
+ "files": [
25
+ "dist/"
26
+ ],
27
+ "exports": {
28
+ ".": {
29
+ "types": "./dist/index.d.ts",
30
+ "default": "./dist/index.js"
31
+ }
32
+ },
33
+ "scripts": {
34
+ "build": "rimraf ./dist && tsc && resolve-tspaths",
35
+ "dev": "email dev -p 3001 --dir ./src/emails",
36
+ "export": "email export --dir ./src/emails",
37
+ "lint": "eslint ./src/ && tsc --noEmit"
38
+ },
39
+ "dependencies": {
40
+ "@tmlmobilidade/connectors": "*",
41
+ "@tmlmobilidade/interfaces": "*",
42
+ "@tmlmobilidade/utils": "*"
43
+ },
44
+ "devDependencies": {
45
+ "@carrismetropolitana/eslint": "20250622.1204.50",
46
+ "@tmlmobilidade/lib": "*",
47
+ "@tmlmobilidade/tsconfig": "*",
48
+ "@tmlmobilidade/types": "*",
49
+ "@types/node": "24.7.0",
50
+ "resolve-tspaths": "0.8.23",
51
+ "rimraf": "6.0.1",
52
+ "typescript": "5.9.3"
53
+ }
54
+ }