npm - @tmlmobilidade/import-gtfs - Versions diffs - 20251009.1357.48 - Mend

@tmlmobilidade/import-gtfs 20251009.1357.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +1 -0
package/dist/index.d.ts +9 -0
package/dist/index.js +65 -0
package/dist/src/processors/calendar.d.ts +11 -0
package/dist/src/processors/calendar.js +80 -0
package/dist/src/processors/calendar_dates.d.ts +11 -0
package/dist/src/processors/calendar_dates.js +74 -0
package/dist/src/processors/routes.d.ts +7 -0
package/dist/src/processors/routes.js +37 -0
package/dist/src/processors/shapes.d.ts +7 -0
package/dist/src/processors/shapes.js +42 -0
package/dist/src/processors/stop_times.d.ts +9 -0
package/dist/src/processors/stop_times.js +48 -0
package/dist/src/processors/stops.d.ts +8 -0
package/dist/src/processors/stops.js +37 -0
package/dist/src/processors/trips.d.ts +7 -0
package/dist/src/processors/trips.js +45 -0
package/dist/src/types.d.ts +47 -0
package/dist/src/types.js +2 -0
package/dist/src/utils/extract-file.d.ts +8 -0
package/dist/src/utils/extract-file.js +63 -0
package/dist/src/utils/gtfs-time-string.d.ts +2 -0
package/dist/src/utils/gtfs-time-string.js +19 -0
package/dist/src/utils/init-tables.d.ts +6 -0
package/dist/src/utils/init-tables.js +120 -0
package/dist/src/utils/parse-csv.d.ts +1 -0
package/dist/src/utils/parse-csv.js +13 -0
package/dist/src/utils/unzip-file.d.ts +2 -0
package/dist/src/utils/unzip-file.js +21 -0
package/package.json +54 -0

package/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ ## Import GTFS into SQLite DB

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { type GtfsSQLTables, type ImportGtfsToDatabaseConfig } from './src/types.js';
+import { type Plan } from '@tmlmobilidade/types';
+/**
+ * Imports GTFS data into the database for a given plan.
+ * @param plan The plan containing GTFS feed information.
+ * @param config Optional configuration for the import process.
+ * @returns A promise that resolves to the imported GTFS SQL tables.
+ */
+export declare function importGtfsToDatabase(plan: Plan, config?: ImportGtfsToDatabaseConfig): Promise<GtfsSQLTables>;

package/dist/index.js ADDED Viewed

@@ -0,0 +1,65 @@
+/* * */
+import { processCalendarFile } from './src/processors/calendar.js';
+import { processCalendarDatesFile } from './src/processors/calendar_dates.js';
+import { processRoutesFile } from './src/processors/routes.js';
+import { processShapesFile } from './src/processors/shapes.js';
+import { processStopTimesFile } from './src/processors/stop_times.js';
+import { processStopsFile } from './src/processors/stops.js';
+import { processTripsFile } from './src/processors/trips.js';
+import { downloadAndExtractGtfs } from './src/utils/extract-file.js';
+import { initGtfsSqlTables } from './src/utils/init-tables.js';
+import TIMETRACKER from '@helperkits/timer';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Imports GTFS data into the database for a given plan.
+ * @param plan The plan containing GTFS feed information.
+ * @param config Optional configuration for the import process.
+ * @returns A promise that resolves to the imported GTFS SQL tables.
+ */
+export async function importGtfsToDatabase(plan, config = {}) {
+    try {
+        //
+        const globalTimer = new TIMETRACKER();
+        Logs.info(`Importing ${plan._id} GTFS to database...`);
+        //
+        // Initialize context for the current plan
+        const context = {
+            counters: {
+                calendar_dates: 0,
+                hashed_shapes: 0,
+                hashed_trips: 0,
+                shapes: 0,
+                stop_times: 0,
+                trips: 0,
+            },
+            gtfs: initGtfsSqlTables(),
+            plan: plan,
+            referenced_route_ids: new Set(),
+            referenced_shape_ids: new Set(),
+            workdir: await downloadAndExtractGtfs(plan),
+        };
+        //
+        // Validate GTFS feed info
+        if (!plan.gtfs_feed_info?.feed_start_date || !plan.gtfs_feed_info?.feed_end_date) {
+            throw new Error(`Plan "${plan._id}" is missing GTFS feed start and/or end date.`);
+        }
+        //
+        // Process GTFS files in the correct order
+        await processCalendarFile(context, config.start_date ?? plan.gtfs_feed_info.feed_start_date, config.end_date ?? plan.gtfs_feed_info.feed_end_date);
+        await processCalendarDatesFile(context, config.start_date ?? plan.gtfs_feed_info.feed_start_date, config.end_date ?? plan.gtfs_feed_info.feed_end_date);
+        await processTripsFile(context);
+        await processRoutesFile(context);
+        await processShapesFile(context);
+        await processStopsFile(context);
+        await processStopTimesFile(context);
+        Logs.success(`Finished importing GTFS to database for plan "${plan._id}" in ${globalTimer.get()}.`, 0);
+        Logs.divider();
+        Logs.terminate(`Finished importing GTFS to database in ${globalTimer.get()}.`);
+        return context.gtfs;
+        //
+    }
+    catch (error) {
+        Logs.error('Error parsing plan.', error);
+        throw error;
+    }
+}

package/dist/src/processors/calendar.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { type ImportGtfsContext } from '../types.js';
+import { OperationalDate } from '@tmlmobilidade/types';
+/**
+ * Processes the calendar.txt file from the GTFS dataset.
+ * It extracts service_ids that are valid between the given start_date and end_date,
+ * and populates the context's calendar_dates map with operational dates for each service_id.
+ * @param context The import GTFS context to populate with calendar dates.
+ * @param startDate The start date of the range to filter service_ids.
+ * @param endDate The end date of the range to filter service_ids.
+ */
+export declare function processCalendarFile(context: ImportGtfsContext, startDate: OperationalDate, endDate: OperationalDate): Promise<void>;

package/dist/src/processors/calendar.js ADDED Viewed

@@ -0,0 +1,80 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsCalendar } from '@tmlmobilidade/types';
+import { Dates, getOperationalDatesFromRange, Logs } from '@tmlmobilidade/utils';
+import fs from 'node:fs';
+/**
+ * Processes the calendar.txt file from the GTFS dataset.
+ * It extracts service_ids that are valid between the given start_date and end_date,
+ * and populates the context's calendar_dates map with operational dates for each service_id.
+ * @param context The import GTFS context to populate with calendar dates.
+ * @param startDate The start date of the range to filter service_ids.
+ * @param endDate The end date of the range to filter service_ids.
+ */
+export async function processCalendarFile(context, startDate, endDate) {
+    try {
+        //
+        const calendarParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "calendar.txt"...`);
+        const parseEachRow = async (data) => {
+            //
+            //
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsCalendar(data);
+            //
+            // Check if this service_id is between the given start_date and end_date.
+            // Clip the service_id's start and end dates to the given start and end dates.
+            let serviceIdStartDate = validatedData.start_date;
+            let serviceIdEndDate = validatedData.end_date;
+            if (serviceIdEndDate < startDate || serviceIdStartDate > endDate)
+                return;
+            if (serviceIdStartDate < startDate)
+                serviceIdStartDate = startDate;
+            if (serviceIdEndDate > endDate)
+                serviceIdEndDate = endDate;
+            //
+            // If we're here, it means the service_id is valid between the given dates.
+            // For the configured weekly schedule, create the individual operational dates
+            // for each day of the week that is active.
+            const allOperationalDatesInRange = getOperationalDatesFromRange(serviceIdStartDate, serviceIdEndDate);
+            const validOperationalDates = new Set();
+            for (const currentDate of allOperationalDatesInRange) {
+                const dayOfWeek = Dates.fromOperationalDate(currentDate, 'Europe/Lisbon').toFormat('c');
+                if (dayOfWeek === '1' && validatedData.monday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '2' && validatedData.tuesday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '3' && validatedData.wednesday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '4' && validatedData.thursday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '5' && validatedData.friday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '6' && validatedData.saturday === 1)
+                    validOperationalDates.add(currentDate);
+                if (dayOfWeek === '7' && validatedData.sunday === 1)
+                    validOperationalDates.add(currentDate);
+            }
+            //
+            // Save the valid operational dates for this service_id
+            context.gtfs.calendar_dates.set(validatedData.service_id, Array.from(validOperationalDates));
+            context.counters.calendar_dates += validOperationalDates.size;
+            //
+        };
+        //
+        // Setup the CSV parsing operation only if the file exists
+        if (fs.existsSync(`${context.workdir.extract_dir_path}/calendar.txt`)) {
+            await parseCsvFile(`${context.workdir.extract_dir_path}/calendar.txt`, parseEachRow);
+            Logs.success(`Finished processing "calendar.txt": ${context.gtfs.calendar_dates.size} rows saved in ${calendarParseTimer.get()}.`, 1);
+        }
+        else {
+            Logs.info(`Optional file "calendar.txt" not found. This may or may not be an error. Proceeding...`, 1);
+        }
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "calendar.txt" file.', error);
+        throw new Error('✖︎ Error processing "calendar.txt" file.');
+    }
+}

package/dist/src/processors/calendar_dates.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { type ImportGtfsContext } from '../types.js';
+import { OperationalDate } from '@tmlmobilidade/types';
+/**
+ * Processes the calendar_dates.txt file from the GTFS dataset.
+ * It extracts service_ids that are valid between the given start_date and end_date,
+ * and updates the context's calendar_dates map with the new dates.
+ * @param context The import GTFS context to populate with calendar dates.
+ * @param startDate The start date of the range to filter service_ids.
+ * @param endDate The end date of the range to filter service_ids.
+ */
+export declare function processCalendarDatesFile(context: ImportGtfsContext, startDate: OperationalDate, endDate: OperationalDate): Promise<void>;

package/dist/src/processors/calendar_dates.js ADDED Viewed

@@ -0,0 +1,74 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsCalendarDate } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+import fs from 'node:fs';
+/**
+ * Processes the calendar_dates.txt file from the GTFS dataset.
+ * It extracts service_ids that are valid between the given start_date and end_date,
+ * and updates the context's calendar_dates map with the new dates.
+ * @param context The import GTFS context to populate with calendar dates.
+ * @param startDate The start date of the range to filter service_ids.
+ * @param endDate The end date of the range to filter service_ids.
+ */
+export async function processCalendarDatesFile(context, startDate, endDate) {
+    try {
+        //
+        const calendarDatesParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "calendar_dates.txt"...`);
+        const parseEachRow = async (data) => {
+            //
+            //
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsCalendarDate(data);
+            //
+            // Skip if this row's date is not between the given start and end dates
+            if (validatedData.date < startDate || validatedData.date > endDate)
+                return;
+            //
+            // If we're here, it means the service_id is valid between the given dates.
+            // Get the previously saved calendars and check if it exists for this service_id.
+            const savedCalendar = context.gtfs.calendar_dates.get(validatedData.service_id);
+            if (savedCalendar) {
+                // Create a new Set to avoid duplicated dates
+                const updatedCalendar = new Set(savedCalendar);
+                // If this service_id was previously saved, either add or remove the current date
+                // to it based on the exception_type value for this row.
+                if (validatedData.exception_type === 1) {
+                    updatedCalendar.add(validatedData.date);
+                    context.counters.calendar_dates++;
+                }
+                else if (validatedData.exception_type === 2) {
+                    updatedCalendar.delete(validatedData.date);
+                    context.counters.calendar_dates--;
+                }
+                // Update the service_id with the new dates
+                context.gtfs.calendar_dates.set(validatedData.service_id, Array.from(updatedCalendar));
+            }
+            else {
+                // If this is the first time we're seeing this service_id, then it is only necessary
+                // to initiate a new dates array if it is a service addition
+                if (validatedData.exception_type === 1) {
+                    context.gtfs.calendar_dates.set(validatedData.service_id, [validatedData.date]);
+                    context.counters.calendar_dates++;
+                }
+            }
+            //
+        };
+        //
+        // Setup the CSV parsing operation only if the file exists
+        if (fs.existsSync(`${context.workdir.extract_dir_path}/calendar_dates.txt`)) {
+            await parseCsvFile(`${context.workdir.extract_dir_path}/calendar_dates.txt`, parseEachRow);
+            Logs.success(`Finished processing "calendar_dates.txt": ${context.gtfs.calendar_dates.size} rows saved in ${calendarDatesParseTimer.get()}.`, 1);
+        }
+        else {
+            Logs.info(`Optional file "calendar_dates.txt" not found. This may or may not be an error. Proceeding...`, 1);
+        }
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "calendar_dates.txt" file.', error);
+        throw new Error('✖︎ Error processing "calendar_dates.txt" file.');
+    }
+}

package/dist/src/processors/routes.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Processes the routes.txt file from the GTFS dataset.
+ * It filters routes based on the previously saved trips.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export declare function processRoutesFile(context: ImportGtfsContext): Promise<void>;

package/dist/src/processors/routes.js ADDED Viewed

@@ -0,0 +1,37 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsRouteExtended } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Processes the routes.txt file from the GTFS dataset.
+ * It filters routes based on the previously saved trips.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export async function processRoutesFile(context) {
+    try {
+        //
+        const routesParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "routes.txt"...`);
+        const parseEachRow = async (data) => {
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsRouteExtended(data);
+            // For each route, only save the ones referenced
+            // by the previously saved trips.
+            if (!context.referenced_route_ids.has(validatedData.route_id))
+                return;
+            // Save the exported row
+            context.gtfs.routes.write(validatedData);
+        };
+        //
+        // Setup the CSV parsing operation
+        await parseCsvFile(`${context.workdir.extract_dir_path}/routes.txt`, parseEachRow);
+        context.gtfs.routes.flush();
+        Logs.success(`Finished processing "routes.txt": ${context.gtfs.routes.size} rows saved in ${routesParseTimer.get()}.`, 1);
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "routes.txt" file.', error);
+        throw new Error('✖︎ Error processing "routes.txt" file.');
+    }
+}

package/dist/src/processors/shapes.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Processes the shapes.txt file from the GTFS dataset.
+ * Include only the shapes referenced by the previously saved trips.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export declare function processShapesFile(context: ImportGtfsContext): Promise<void>;

package/dist/src/processors/shapes.js ADDED Viewed

@@ -0,0 +1,42 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsShape } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Processes the shapes.txt file from the GTFS dataset.
+ * Include only the shapes referenced by the previously saved trips.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export async function processShapesFile(context) {
+    try {
+        //
+        const shapesParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "shapes.txt"...`);
+        const parseEachRow = async (data) => {
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsShape(data);
+            // For each route, only save the ones referenced
+            // by the previously saved trips.
+            if (!context.referenced_shape_ids.has(validatedData.shape_id))
+                return;
+            // Save the exported row
+            context.gtfs.shapes.write(validatedData);
+            // Log progress
+            if (context.counters.shapes % 100000 === 0)
+                Logs.info(`Parsed ${context.counters.shapes} shapes.txt rows so far.`);
+            // Increment the counter
+            context.counters.shapes++;
+        };
+        //
+        // Setup the CSV parsing operation
+        await parseCsvFile(`${context.workdir.extract_dir_path}/shapes.txt`, parseEachRow);
+        context.gtfs.shapes.flush();
+        Logs.success(`Finished processing "shapes.txt": ${context.gtfs.shapes.size} rows saved in ${shapesParseTimer.get()}.`, 1);
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "shapes.txt" file.', error);
+        throw new Error('✖︎ Error processing "shapes.txt" file.');
+    }
+}

package/dist/src/processors/stop_times.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Processes the stop_times.txt file from the GTFS dataset.
+ * Only include the stop_times for trips referenced before.
+ * Since this is the most resource intensive operation of them all,
+ * include the associated stop data right away to avoid another lookup later.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export declare function processStopTimesFile(context: ImportGtfsContext): Promise<void>;

package/dist/src/processors/stop_times.js ADDED Viewed

@@ -0,0 +1,48 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsStopTime } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Processes the stop_times.txt file from the GTFS dataset.
+ * Only include the stop_times for trips referenced before.
+ * Since this is the most resource intensive operation of them all,
+ * include the associated stop data right away to avoid another lookup later.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export async function processStopTimesFile(context) {
+    try {
+        //
+        const stopTimesParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "stop_times.txt"...`);
+        const parseEachRow = async (data) => {
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsStopTime(data);
+            // Skip if this row's trip_id was not saved before.
+            const tripData = context.gtfs.trips.get('trip_id', validatedData.trip_id);
+            if (!tripData)
+                return;
+            // Also, check if the stop_id is valid and was saved before.
+            const stopData = context.gtfs.stops.get('stop_id', validatedData.stop_id);
+            if (!stopData)
+                return;
+            // Save the exported row
+            context.gtfs.stop_times.write(validatedData);
+            // Log progress
+            if (context.counters.stop_times % 100000 === 0)
+                Logs.info(`Parsed ${context.counters.stop_times} stop_times.txt rows so far.`);
+            // Increment the counter
+            context.counters.stop_times++;
+        };
+        //
+        // Setup the CSV parsing operation
+        await parseCsvFile(`${context.workdir.extract_dir_path}/stop_times.txt`, parseEachRow);
+        context.gtfs.stop_times.flush();
+        Logs.success(`Finished processing "stop_times.txt": ${context.counters.stop_times} rows saved in ${stopTimesParseTimer.get()}.`, 1);
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "stop_times.txt" file.', error);
+        throw new Error('✖︎ Error processing "stop_times.txt" file.');
+    }
+}

package/dist/src/processors/stops.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Processes the stops.txt file from the GTFS dataset.
+ * include all of them since we don't have a way to filter them yet like trips/routes/shapes.
+ * By saving all of them, we also speed up the processing of each stop_time by including the stop data right away.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export declare function processStopsFile(context: ImportGtfsContext): Promise<void>;

package/dist/src/processors/stops.js ADDED Viewed

@@ -0,0 +1,37 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsStopExtended } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Processes the stops.txt file from the GTFS dataset.
+ * include all of them since we don't have a way to filter them yet like trips/routes/shapes.
+ * By saving all of them, we also speed up the processing of each stop_time by including the stop data right away.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export async function processStopsFile(context) {
+    try {
+        //
+        const stopsParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "stops.txt"...`);
+        const parseEachRow = async (data) => {
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsStopExtended(data);
+            // Skip if stop already exists
+            if (context.gtfs.stops.get('stop_id', validatedData.stop_id))
+                return;
+            // Save the exported row
+            context.gtfs.stops.write(validatedData);
+        };
+        //
+        // Setup the CSV parsing operation
+        await parseCsvFile(`${context.workdir.extract_dir_path}/stops.txt`, parseEachRow);
+        context.gtfs.stops.flush();
+        Logs.success(`Finished processing "stops.txt": ${context.gtfs.stops.size} rows saved in ${stopsParseTimer.get()}.`, 1);
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "stops.txt" file.', error);
+        throw new Error('✖︎ Error processing "stops.txt" file.');
+    }
+}

package/dist/src/processors/trips.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Processes the trips.txt file from the GTFS dataset.
+ * It filters trips based on the previously saved calendar dates.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export declare function processTripsFile(context: ImportGtfsContext): Promise<void>;

package/dist/src/processors/trips.js ADDED Viewed

@@ -0,0 +1,45 @@
+/* * */
+import { parseCsvFile } from '../utils/parse-csv.js';
+import TIMETRACKER from '@helperkits/timer';
+import { validateGtfsTripExtended } from '@tmlmobilidade/types';
+import { Logs } from '@tmlmobilidade/utils';
+/**
+ * Processes the trips.txt file from the GTFS dataset.
+ * It filters trips based on the previously saved calendar dates.
+ * @param context The import GTFS context containing references to SQL tables and other metadata.
+ */
+export async function processTripsFile(context) {
+    try {
+        //
+        const tripsParseTimer = new TIMETRACKER();
+        Logs.info(`Reading zip entry "trips.txt"...`);
+        const parseEachRow = async (data) => {
+            // Validate the current row against the proper type
+            const validatedData = validateGtfsTripExtended(data);
+            // For each trip, check if the associated service_id was saved
+            // in the previous step or not. Include it if yes, skip otherwise.
+            if (!context.gtfs.calendar_dates.has(validatedData.service_id))
+                return;
+            // Save the exported row
+            context.gtfs.trips.write(validatedData);
+            // Reference the associated entities to filter them later.
+            context.referenced_route_ids.add(validatedData.route_id);
+            context.referenced_shape_ids.add(validatedData.shape_id);
+            // Log progress
+            if (context.counters.trips % 10000 === 0)
+                Logs.info(`Parsed ${context.counters.trips} trips.txt rows so far.`);
+            // Increment the counter
+            context.counters.trips++;
+        };
+        //
+        // Setup the CSV parsing operation
+        await parseCsvFile(`${context.workdir.extract_dir_path}/trips.txt`, parseEachRow);
+        context.gtfs.trips.flush();
+        Logs.success(`Finished processing "trips.txt": ${context.gtfs.trips.size} rows saved in ${tripsParseTimer.get()}.`, 1);
+        //
+    }
+    catch (error) {
+        Logs.error('Error processing "trips.txt" file.', error);
+        throw new Error('✖︎ Error processing "trips.txt" file.');
+    }
+}

package/dist/src/types.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+import { SQLiteWriter } from '@tmlmobilidade/connectors';
+import { type GTFS_Route_Extended, type GTFS_Shape, type GTFS_Stop_Extended, type GTFS_StopTime, type GTFS_Trip_Extended, type Plan } from '@tmlmobilidade/types';
+import { type OperationalDate } from '@tmlmobilidade/types';
+/**
+ * Configuration options for importing GTFS data into a database.
+ */
+export interface ImportGtfsToDatabaseConfig {
+    end_date?: OperationalDate;
+    start_date?: OperationalDate;
+}
+/**
+ * Holds references to all GTFS-related SQL tables and writers.
+ * Each property corresponds to a specific GTFS entity and is associated
+ * with a SQLiteWriter instance for that entity.
+ */
+export interface GtfsSQLTables {
+    calendar_dates: Map<string, OperationalDate[]>;
+    routes: SQLiteWriter<GTFS_Route_Extended>;
+    shapes: SQLiteWriter<GTFS_Shape>;
+    stop_times: SQLiteWriter<GTFS_StopTime>;
+    stops: SQLiteWriter<GTFS_Stop_Extended>;
+    trips: SQLiteWriter<GTFS_Trip_Extended>;
+}
+/**
+ * Context object used throughout the GTFS import process.
+ * It contains counters for various entities, references to GTFS SQL tables,
+ * the original plan metadata, sets of referenced IDs, and paths for working directories.
+ */
+export interface ImportGtfsContext {
+    counters: {
+        calendar_dates: number;
+        hashed_shapes: number;
+        hashed_trips: number;
+        shapes: number;
+        stop_times: number;
+        trips: number;
+    };
+    gtfs: GtfsSQLTables;
+    plan: Plan;
+    referenced_route_ids: Set<string>;
+    referenced_shape_ids: Set<string>;
+    workdir: {
+        download_file_path: string;
+        extract_dir_path: string;
+        path: string;
+    };
+}

package/dist/src/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ /* * */
2	+ export {};

package/dist/src/utils/extract-file.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+import { type ImportGtfsContext } from '../types.js';
+import { type Plan } from '@tmlmobilidade/types';
+/**
+ * Downloads and extracts the GTFS files for the given plan.
+ * @param plan The plan containing the operation_file_id to download and extract.
+ * @returns The working directory containing the extracted GTFS files.
+ */
+export declare function downloadAndExtractGtfs(plan: Plan): Promise<ImportGtfsContext['workdir']>;

package/dist/src/utils/extract-file.js ADDED Viewed

@@ -0,0 +1,63 @@
+/* * */
+import { unzipFile } from './unzip-file.js';
+import { files } from '@tmlmobilidade/interfaces';
+import { Logs } from '@tmlmobilidade/utils';
+import fs from 'node:fs';
+/**
+ * Downloads and extracts the GTFS files for the given plan.
+ * @param plan The plan containing the operation_file_id to download and extract.
+ * @returns The working directory containing the extracted GTFS files.
+ */
+export async function downloadAndExtractGtfs(plan) {
+    //
+    // Return early if no operation file is found
+    if (!plan.operation_file_id) {
+        Logs.error(`No operation file found for plan "${plan._id}".`);
+        process.exit(1);
+    }
+    //
+    // Prepare the working directory
+    const workdirPath = `/tmp/${plan._id}`;
+    const downloadFilePath = `${workdirPath}/${plan.operation_file_id}.zip`;
+    const extractDirPath = `${workdirPath}/extracted`;
+    try {
+        fs.rmSync(workdirPath, { force: true, recursive: true });
+        fs.mkdirSync(workdirPath, { recursive: true });
+        Logs.success('Prepared working directory.', 1);
+    }
+    catch (error) {
+        Logs.error(`Error preparing workdir path "${workdirPath}".`, error);
+        process.exit(1);
+    }
+    //
+    // Get the associated Operation GTFS archive URL,
+    // and try to download, save and unzip it.
+    const operationFileData = await files.findById(plan.operation_file_id);
+    if (!operationFileData || !operationFileData.url) {
+        Logs.error(`No operation file found for plan "${plan._id}".`);
+        process.exit(1);
+    }
+    try {
+        const downloadResponse = await fetch(operationFileData.url);
+        const downloadArrayBuffer = await downloadResponse.arrayBuffer();
+        fs.writeFileSync(downloadFilePath, Buffer.from(downloadArrayBuffer));
+    }
+    catch (error) {
+        Logs.error('Error downloading the file.', error);
+        process.exit(1);
+    }
+    try {
+        await unzipFile(downloadFilePath, extractDirPath);
+        Logs.success(`Unzipped GTFS file from "${downloadFilePath}" to "${extractDirPath}".`, 1);
+    }
+    catch (error) {
+        Logs.error('Error unzipping the file.', error);
+        process.exit(1);
+    }
+    return {
+        download_file_path: downloadFilePath,
+        extract_dir_path: extractDirPath,
+        path: workdirPath,
+    };
+    //
+}

package/dist/src/utils/gtfs-time-string.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { type OperationalDate, type UnixTimestamp } from '@tmlmobilidade/types';
2	+ export declare const convertGTFSTimeStringAndOperationalDateToUnixTimestamp: (timeString: string, operationalDate: OperationalDate) => UnixTimestamp;

package/dist/src/utils/gtfs-time-string.js ADDED Viewed

@@ -0,0 +1,19 @@
+/* * */
+import { Dates } from '@tmlmobilidade/utils';
+/* * */
+export const convertGTFSTimeStringAndOperationalDateToUnixTimestamp = (timeString, operationalDate) => {
+    //
+    // Return early if no time string is provided
+    if (!timeString || !operationalDate)
+        throw new Error(`✖︎ No time string or operational date provided. timeString: ${timeString}, operationalDate: ${operationalDate}`);
+    // Check if the timestring is in the format HH:MM:SS
+    if (!/^\d{2}:\d{2}:\d{2}$/.test(timeString))
+        throw new Error(`✖︎ Invalid time string format. timeString: ${timeString}`);
+    // Extract the individual components of the time string (HH:MM:SS)
+    const [hoursOperation, minutesOperation, secondsOperation] = timeString.split(':').map(Number);
+    return Dates
+        .fromOperationalDate(operationalDate, 'Europe/Lisbon')
+        .set({ hour: hoursOperation, minute: minutesOperation, second: secondsOperation })
+        .unix_timestamp;
+    //
+};

package/dist/src/utils/init-tables.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import { type ImportGtfsContext } from '../types.js';
+/**
+ * Initializes GTFS SQL tables and writers.
+ * @returns An object containing initialized GTFS SQL tables and writers.
+ */
+export declare function initGtfsSqlTables(): ImportGtfsContext['gtfs'];

package/dist/src/utils/init-tables.js ADDED Viewed

@@ -0,0 +1,120 @@
+/* * */
+import { SQLiteWriter } from '@tmlmobilidade/connectors';
+/**
+ * Initializes GTFS SQL tables and writers.
+ * @returns An object containing initialized GTFS SQL tables and writers.
+ */
+export function initGtfsSqlTables() {
+    //
+    const calendarDatesMap = new Map();
+    const tripsWriter = new SQLiteWriter({
+        batch_size: 10000,
+        columns: [
+            { indexed: true, name: 'trip_id', not_null: true, primary_key: true, type: 'TEXT' },
+            { indexed: false, name: 'bikes_allowed', type: 'INTEGER' },
+            { indexed: false, name: 'block_id', type: 'TEXT' },
+            { indexed: false, name: 'direction_id', not_null: true, type: 'INTEGER' },
+            { indexed: false, name: 'route_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'service_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'shape_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'trip_headsign', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'trip_short_name', type: 'TEXT' },
+            { indexed: false, name: 'wheelchair_accessible', type: 'INTEGER' },
+            { indexed: false, name: 'pattern_id', not_null: true, type: 'TEXT' },
+        ],
+    });
+    const routesWriter = new SQLiteWriter({
+        batch_size: 10000,
+        columns: [
+            { indexed: false, name: 'agency_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'continuous_drop_off', type: 'INTEGER' },
+            { indexed: false, name: 'continuous_pickup', type: 'INTEGER' },
+            { indexed: false, name: 'route_color', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'route_desc', type: 'TEXT' },
+            { indexed: true, name: 'route_id', not_null: true, primary_key: true, type: 'TEXT' },
+            { indexed: false, name: 'route_long_name', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'route_short_name', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'route_sort_order', type: 'INTEGER' },
+            { indexed: false, name: 'route_text_color', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'route_type', not_null: true, type: 'INTEGER' },
+            { indexed: false, name: 'route_url', type: 'TEXT' },
+            { indexed: false, name: 'circular', type: 'INTEGER' },
+            { indexed: false, name: 'line_id', not_null: true, type: 'INTEGER' },
+            { indexed: false, name: 'line_long_name', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'line_short_name', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'path_type', type: 'INTEGER' },
+            { indexed: false, name: 'route_remarks', type: 'TEXT' },
+            { indexed: false, name: 'school', type: 'INTEGER' },
+        ],
+    });
+    const shapesWriter = new SQLiteWriter({
+        batch_size: 100000,
+        columns: [
+            { indexed: true, name: 'shape_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'shape_pt_lat', not_null: true, type: 'REAL' },
+            { indexed: false, name: 'shape_pt_lon', not_null: true, type: 'REAL' },
+            { indexed: false, name: 'shape_pt_sequence', not_null: true, type: 'INTEGER' },
+            { indexed: false, name: 'shape_dist_traveled', not_null: true, type: 'REAL' },
+        ],
+    });
+    const stopsWriter = new SQLiteWriter({
+        batch_size: 10000,
+        columns: [
+            { indexed: false, name: 'level_id', type: 'TEXT' },
+            { indexed: false, name: 'location_type', type: 'INTEGER' },
+            { indexed: false, name: 'parent_station', type: 'TEXT' },
+            { indexed: false, name: 'platform_code', type: 'TEXT' },
+            { indexed: false, name: 'stop_code', type: 'TEXT' },
+            { indexed: false, name: 'stop_desc', type: 'TEXT' },
+            { indexed: true, name: 'stop_id', not_null: true, primary_key: true, type: 'TEXT' },
+            { indexed: false, name: 'stop_lat', not_null: true, type: 'REAL' },
+            { indexed: false, name: 'stop_lon', not_null: true, type: 'REAL' },
+            { indexed: false, name: 'stop_name', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'stop_timezone', type: 'TEXT' },
+            { indexed: false, name: 'stop_url', type: 'TEXT' },
+            { indexed: false, name: 'wheelchair_boarding', type: 'INTEGER' },
+            { indexed: false, name: 'zone_id', type: 'TEXT' },
+            { indexed: false, name: 'has_bench', type: 'INTEGER' },
+            { indexed: false, name: 'has_network_map', type: 'INTEGER' },
+            { indexed: false, name: 'has_pip_real_time', type: 'INTEGER' },
+            { indexed: false, name: 'has_schedules', type: 'INTEGER' },
+            { indexed: false, name: 'has_shelter', type: 'INTEGER' },
+            { indexed: false, name: 'has_stop_sign', type: 'INTEGER' },
+            { indexed: false, name: 'has_tariffs_information', type: 'INTEGER' },
+            { indexed: false, name: 'municipality_id', type: 'TEXT' },
+            { indexed: false, name: 'parish_id', type: 'TEXT' },
+            { indexed: false, name: 'public_visible', type: 'INTEGER' },
+            { indexed: false, name: 'region_id', type: 'TEXT' },
+            { indexed: false, name: 'shelter_code', type: 'TEXT' },
+            { indexed: false, name: 'shelter_maintainer', type: 'TEXT' },
+            { indexed: false, name: 'stop_short_name', type: 'TEXT' },
+            { indexed: false, name: 'tts_stop_name', type: 'TEXT' },
+        ],
+    });
+    const stopTimesWriter = new SQLiteWriter({
+        batch_size: 100000,
+        columns: [
+            { indexed: false, name: 'arrival_time', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'continuous_drop_off', type: 'INTEGER' },
+            { indexed: false, name: 'continuous_pickup', type: 'INTEGER' },
+            { indexed: false, name: 'departure_time', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'drop_off_type', type: 'INTEGER' },
+            { indexed: false, name: 'pickup_type', type: 'INTEGER' },
+            { indexed: false, name: 'shape_dist_traveled', not_null: true, type: 'REAL' },
+            { indexed: false, name: 'stop_headsign', type: 'TEXT' },
+            { indexed: true, name: 'stop_id', not_null: true, type: 'TEXT' },
+            { indexed: true, name: 'trip_id', not_null: true, type: 'TEXT' },
+            { indexed: false, name: 'stop_sequence', not_null: true, type: 'INTEGER' },
+            { indexed: false, name: 'timepoint', type: 'INTEGER' },
+        ],
+    });
+    return {
+        calendar_dates: calendarDatesMap,
+        routes: routesWriter,
+        shapes: shapesWriter,
+        stop_times: stopTimesWriter,
+        stops: stopsWriter,
+        trips: tripsWriter,
+    };
+    //
+}

package/dist/src/utils/parse-csv.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function parseCsvFile(filePath: string, rowParser: (rowData: any) => Promise<void>): Promise<void>;

package/dist/src/utils/parse-csv.js ADDED Viewed

@@ -0,0 +1,13 @@
+/* * */
+import { parse as csvParser } from 'csv-parse';
+import fs from 'fs';
+/* * */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export async function parseCsvFile(filePath, rowParser) {
+    const parser = csvParser({ bom: true, columns: true, record_delimiter: ['\n', '\r', '\r\n'], skip_empty_lines: true, trim: true });
+    const fileStream = fs.createReadStream(filePath);
+    const stream = fileStream.pipe(parser);
+    for await (const rowData of stream) {
+        await rowParser(rowData);
+    }
+}

package/dist/src/utils/unzip-file.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare const unzipFile: (zipFilePath: any, outputDir: any) => Promise<void>;
2	+ export declare const setDirectoryPermissions: (dirPath: any, mode?: number) => void;

package/dist/src/utils/unzip-file.js ADDED Viewed

@@ -0,0 +1,21 @@
+/* * */
+import extract from 'extract-zip';
+import fs from 'fs';
+/* * */
+export const unzipFile = async (zipFilePath, outputDir) => {
+    await extract(zipFilePath, { dir: outputDir });
+    setDirectoryPermissions(outputDir);
+};
+/* * */
+export const setDirectoryPermissions = (dirPath, mode = 0o666) => {
+    const files = fs.readdirSync(dirPath, { withFileTypes: true });
+    for (const file of files) {
+        const filePath = `${dirPath}/${file.name}`;
+        if (file.isDirectory()) {
+            setDirectoryPermissions(filePath, mode);
+        }
+        else {
+            fs.chmodSync(filePath, mode);
+        }
+    }
+};

package/package.json ADDED Viewed

@@ -0,0 +1,54 @@
+{
+	"name": "@tmlmobilidade/import-gtfs",
+	"version": "20251009.1357.48",
+	"author": "João de Vasconcelos & Jusi Monteiro",
+	"license": "AGPL-3.0-or-later",
+	"homepage": "https://github.com/tmlmobilidade/services#readme",
+	"bugs": {
+		"url": "https://github.com/tmlmobilidade/services/issues"
+	},
+	"repository": {
+		"type": "git",
+		"url": "git+https://github.com/tmlmobilidade/services.git"
+	},
+	"keywords": [
+		"public transit",
+		"tml",
+		"transportes metropolitanos de lisboa",
+		"services"
+	],
+	"publishConfig": {
+		"access": "public"
+	},
+	"type": "module",
+	"files": [
+		"dist/"
+	],
+	"exports": {
+		".": {
+			"types": "./dist/index.d.ts",
+			"default": "./dist/index.js"
+		}
+	},
+	"scripts": {
+		"build": "rimraf ./dist && tsc && resolve-tspaths",
+		"dev": "email dev -p 3001 --dir ./src/emails",
+		"export": "email export --dir ./src/emails",
+		"lint": "eslint ./src/ && tsc --noEmit"
+	},
+	"dependencies": {
+		"@tmlmobilidade/connectors": "*",
+		"@tmlmobilidade/interfaces": "*",
+		"@tmlmobilidade/utils": "*"
+	},
+	"devDependencies": {
+		"@carrismetropolitana/eslint": "20250622.1204.50",
+		"@tmlmobilidade/lib": "*",
+		"@tmlmobilidade/tsconfig": "*",
+		"@tmlmobilidade/types": "*",
+		"@types/node": "24.7.0",
+		"resolve-tspaths": "0.8.23",
+		"rimraf": "6.0.1",
+		"typescript": "5.9.3"
+	}
+}