@transportme/vline-nsp-reader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ export default {
2
+ VLINE_CORPORATE_HOST: 'https://corporate.vline.com.au',
3
+ NSP_PAGE: '/Network-Access/Network-service-plan'
4
+ }
@@ -0,0 +1,153 @@
1
+ import { getOperationDays } from './nsp-utils.mjs'
2
+ import TableReader from './table-reader.mjs'
3
+
4
+ export default class NSPPDFReader {
5
+
6
+ #filePath
7
+ #pages
8
+
9
+ #stations = []
10
+
11
+ constructor(filePath) {
12
+ this.#filePath = filePath
13
+ }
14
+
15
+ getHeaderIndex(pageNum) {
16
+ return this.#pages[pageNum].findIndex(row => row[0] === 'Train Movement Type')
17
+ }
18
+
19
+ getHeader(pageNum) {
20
+ let headerIndex = this.getHeaderIndex(pageNum)
21
+ return this.#pages[pageNum].slice(0, headerIndex + 1)
22
+ }
23
+
24
+ getBody(pageNum) {
25
+ let headerIndex = this.getHeaderIndex(pageNum)
26
+ return this.#pages[pageNum].slice(headerIndex + 2)
27
+ }
28
+
29
+ getStations(pageNum) {
30
+ if (this.#stations[pageNum]) return this.#stations[pageNum]
31
+ let body = this.getBody(pageNum)
32
+ let stationList = body.map(row => row[0])
33
+
34
+ let output = []
35
+ let lastStation = stationList[0]
36
+ for (let i = 0; i < stationList.length; i++) {
37
+ if (stationList[i]) {
38
+ lastStation = stationList[i]
39
+ output.push(stationList[i])
40
+ } else {
41
+ output.push(lastStation)
42
+ }
43
+ }
44
+
45
+ this.#stations[pageNum] = output
46
+ return output
47
+ }
48
+
49
+ async read() {
50
+ let tableReader = new TableReader(this.#filePath)
51
+ let rawPages = await tableReader.read()
52
+
53
+ let pages = []
54
+ for (let page of rawPages) {
55
+ let header = page.slice(0, 4)
56
+ if (header.some(row => row[0] === 'Business ID')) pages.push(page)
57
+ else {
58
+ let colDiff = pages[pages.length - 1][0].length - page[0].length
59
+ if (colDiff > 0) for (let row of page) row.push(...Array(colDiff).fill(''))
60
+ pages[pages.length - 1].push(...page)
61
+ }
62
+ }
63
+
64
+ this.#pages = pages
65
+ }
66
+
67
+ __setPageData(data) { this.#pages = data }
68
+
69
+ processStation(station) {
70
+ if (station.depTime.endsWith('*')) {
71
+ station.express = true
72
+ station.depTime = station.depTime.slice(0, -1)
73
+ }
74
+
75
+ if (station.depTime && !station.arrTime) {
76
+ if (station.depTime.includes('/')) {
77
+ let [ arrTime, depTime ] = station.depTime.split('/')
78
+ station.arrTime = arrTime, station.depTime = depTime
79
+ } else {
80
+ station.arrTime = station.depTime
81
+ }
82
+ }
83
+
84
+ return station
85
+ }
86
+
87
+ getRuns(pageNum) {
88
+ let header = this.getHeader(pageNum)
89
+ let body = this.getBody(pageNum)
90
+ let stations = this.getStations(pageNum)
91
+
92
+ let runs = []
93
+ for (let runIndex = 2; runIndex < header[0].length; runIndex++) {
94
+ let runData = {
95
+ tdn: header[0][runIndex].slice(0, 4),
96
+ conditional: header[0][runIndex].length > 4,
97
+ daysRunCode: header[1][runIndex],
98
+ daysRun: getOperationDays(header[1][runIndex]),
99
+ operator: null, movementType: null,
100
+ formedBy: null, forming: null,
101
+ vehicleType: null, stations: []
102
+ }
103
+
104
+ for (let headerRow = 2; headerRow < header.length; headerRow++) {
105
+ let rowName = header[headerRow][0]
106
+ let rowData = header[headerRow][runIndex]
107
+ if (rowName === 'Operator') runData.operator = rowData
108
+ else if (rowName === 'Train Movement Type') runData.movementType = rowData
109
+ else if (rowName === 'Master Vehicle Formation') runData.vehicleType = rowData
110
+ else if (rowName === 'Formed By On Arrival') runData.formedBy = rowData
111
+ }
112
+
113
+ let lastStation = null
114
+ let lastStationName = null
115
+ for (let stationIndex = 0; stationIndex < stations.length - 1; stationIndex++) {
116
+ let currentStation = stations[stationIndex]
117
+ let stationData = body[stationIndex][runIndex]
118
+ if (!stationData) continue
119
+ if (lastStationName === currentStation) {
120
+ let amendmentType = body[stationIndex][1]
121
+ if (amendmentType === 'Plat') lastStation.plat = stationData
122
+ else if (amendmentType === 'Arr') lastStation.arrTime = stationData
123
+ else lastStation.track = stationData
124
+ } else {
125
+ if (lastStation) runData.stations.push(lastStation)
126
+ lastStation = {
127
+ name: currentStation,
128
+ arrTime: null,
129
+ depTime: stationData,
130
+ plat: null, track: null, express: false
131
+ }
132
+ }
133
+
134
+ lastStationName = currentStation
135
+ }
136
+
137
+ runData.stations.push(lastStation)
138
+ runData.stations = runData.stations.map(station => this.processStation(station))
139
+ let forming = body[stations.length - 1][runIndex].toUpperCase()
140
+ runData.forming = forming.length ? forming : null
141
+ runs.push(runData)
142
+ }
143
+
144
+ return runs
145
+ }
146
+
147
+ getAllRuns() {
148
+ let runs = []
149
+ for (let i = 0; i < this.#pages.length; i++) runs.push(...this.getRuns(i))
150
+ return runs
151
+ }
152
+
153
+ }
@@ -0,0 +1,33 @@
1
+ const daysOfWeek = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
2
+ const dayMapping = {
3
+ 'M': 'Mon',
4
+ 'Tu': 'Tue',
5
+ 'W': 'Wed',
6
+ 'Th': 'Thu',
7
+ 'F': 'Fri',
8
+ 'Sat': 'Sat',
9
+ 'Sun': 'Sun',
10
+ 'Su': 'Sun'
11
+ }
12
+
13
+ export function getOperationDayLetter(letters) {
14
+ return letters.match(/([A-Z][a-z]*)/g)
15
+ }
16
+
17
+ export function getOperationDays(days) {
18
+ let parts = days.split('+')
19
+
20
+ if (parts.length === 1) {
21
+ if (days.endsWith('O')) {
22
+ return getOperationDayLetter(days.slice(0, -1)).map(code => dayMapping[code])
23
+ } else if (days.endsWith('E')) {
24
+ let excludedDays = getOperationDayLetter(days.slice(0, -1)).map(code => dayMapping[code])
25
+ return daysOfWeek.slice(0, -2).filter(day => !excludedDays.includes(day))
26
+ } else if (days === 'MF') return daysOfWeek.slice(0, -2)
27
+ else if (days === 'Daily') return daysOfWeek.slice(0)
28
+ else if (daysOfWeek.includes(days)) return [ days ]
29
+ return []
30
+ }
31
+
32
+ return parts.map(part => getOperationDays(part)).reduce((acc, e) => acc.concat(e), [])
33
+ }
@@ -0,0 +1,98 @@
1
+ import PDFParser from 'pdf2json'
2
+ import fs from 'fs/promises'
3
+
4
+ export default class TableReader {
5
+
6
+ #file
7
+
8
+ constructor(file) {
9
+ this.#file = file
10
+ }
11
+
12
+ parserCallback(data) {
13
+ // PDF's contain pages and each page contains Texts. These texts have an x and y value.
14
+ // So finding Texts with equal y values seems like the solution.
15
+ // However, some y values are off by 0.010 pixels/points so let's first find what the smallest y value could be.
16
+
17
+ // Let's find Texts with the same x value and look for the smallest y distance of these Texts (on the same page of course)
18
+ // Then use those smallest y values (per page) to find Texts that seem to be on the same row
19
+ // If no smallest y value (per page) can be found, use 0 as smallest distance.
20
+
21
+
22
+ // now lets find Texts with 'the same' y-values, Actually y-values in the range of y-smallestYValue and y+smallestYValue:
23
+
24
+ let pages = data.Pages.map(page => {
25
+ let fills = page.Fills
26
+ let colStarts = fills.filter(fill => {
27
+ return fill.h > fill.w
28
+ }).map(fill => fill.x).filter((e, i, a) => a.indexOf(e) === i).sort((a, b) => a - b)
29
+
30
+ let rowStarts = fills.filter(fill => {
31
+ return fill.h < fill.w
32
+ }).map(fill => fill.y).filter((e, i, a) => a.indexOf(e) === i).sort((a, b) => a - b)
33
+
34
+ let pageData = []
35
+ let smallTable = rowStarts.length === 2
36
+
37
+ page.Texts.forEach(text => {
38
+ let textContent = decodeURIComponent(text.R[0].T)
39
+ if (textContent.match(/^(Effective|As at|Replaces)/)) return
40
+
41
+ let firstYGreater = rowStarts.find(r => r > text.y + 0.3)
42
+ let difference = firstYGreater - text.y
43
+ let currentRow = rowStarts.indexOf(firstYGreater) - 1
44
+ if (!smallTable && difference > 0.6) currentRow--
45
+ if (currentRow < 0) return
46
+
47
+ let currentCol = colStarts.findIndex(c => c > text.x + 0.4) - 1
48
+
49
+ if (!pageData[currentRow]) pageData[currentRow] = []
50
+
51
+ if (!pageData[currentRow][currentCol]) pageData[currentRow][currentCol] = textContent
52
+ else pageData[currentRow][currentCol] += ` ${textContent}`
53
+ })
54
+
55
+ for (let y = 0; y < pageData.length; y++) {
56
+ if (!pageData[y]) pageData[y] = []
57
+ for (let x = 0; x < pageData[y].length; x++) {
58
+ if (!pageData[y][x]) pageData[y][x] = ''
59
+ }
60
+ }
61
+ return pageData
62
+ })
63
+
64
+ return pages.map(page => {
65
+ let maxSize = Math.max(...page.map(row => row.length))
66
+ let blankCells = Array(maxSize).fill('')
67
+
68
+ return page.map(row => row.map(g => g.replace(/ +/g, ' ').trim()).concat(blankCells).slice(0, maxSize))
69
+ })
70
+ }
71
+
72
+ read() {
73
+ return new Promise(async (resolve, reject) => {
74
+ let pdfParser = new PDFParser()
75
+
76
+ pdfParser.on("pdfParser_dataReady", data => {
77
+ try {
78
+ resolve(this.parserCallback(data))
79
+ } catch (err) {
80
+ reject(err)
81
+ }
82
+ })
83
+
84
+ pdfParser.on("pdfParser_dataError", err => {
85
+ reject(err)
86
+ })
87
+
88
+ try {
89
+ let pdfBuffer = await fs.readFile(this.#file)
90
+ pdfParser.parseBuffer(pdfBuffer)
91
+ } catch (err) {
92
+ reject(err)
93
+ }
94
+ })
95
+
96
+ }
97
+
98
+ }
@@ -0,0 +1,96 @@
1
+ import fetch from 'node-fetch';
2
+ import constants from './constants.mjs'
3
+ import { load as parseHTML } from 'cheerio'
4
+ import async from 'async'
5
+ import fs from 'fs/promises'
6
+ import path from 'path'
7
+ import { pipeline } from 'stream/promises'
8
+ import { createWriteStream } from 'fs'
9
+ import NSPPDFReader from './nsp-pdf-reader.mjs'
10
+
11
+ export class NSPVersion {
12
+
13
+ version
14
+ effective
15
+
16
+ files = []
17
+
18
+ constructor(version, effective) {
19
+ this.version = version
20
+ this.effective = effective
21
+ }
22
+
23
+ addFile(file) {
24
+ this.files.push(file)
25
+ }
26
+
27
+ async saveFiles(outputDir) {
28
+ try {
29
+ await fs.mkdir(outputDir)
30
+ } catch (e) {}
31
+ await async.forEach(this.files, async file => {
32
+ await file.download(outputDir)
33
+ })
34
+ }
35
+
36
+ }
37
+
38
+ export class NSPFile {
39
+
40
+ name
41
+ href
42
+
43
+ #nspVersion
44
+ #filePath
45
+
46
+ constructor(name, href, nspVersion) {
47
+ this.name = name
48
+ this.href = href
49
+
50
+ this.#nspVersion = nspVersion
51
+ }
52
+
53
+ async download(outputDir) {
54
+ this.#filePath = path.join(outputDir, `${this.#nspVersion.version} ${this.name}.pdf`)
55
+
56
+ let response = await fetch(constants.VLINE_CORPORATE_HOST + this.href)
57
+ let outputStream = createWriteStream(this.#filePath)
58
+
59
+ await pipeline(response.body, outputStream)
60
+ }
61
+
62
+ setFilePath(filePath) {
63
+ this.#filePath = filePath
64
+ }
65
+
66
+ async extractRuns() {
67
+ let reader = new NSPPDFReader(this.#filePath)
68
+ await reader.read()
69
+ return reader.getAllRuns()
70
+ }
71
+
72
+ }
73
+
74
+ export async function getNSPVersion() {
75
+ let body = await (await fetch(constants.VLINE_CORPORATE_HOST + constants.NSP_PAGE)).text()
76
+ let $ = parseHTML(body)
77
+
78
+ let buttons = Array.from($('div#publication-list > a.btn.button-file-link-caption'))
79
+
80
+ let nspVersions = {}
81
+
82
+ buttons.filter(button => $(button).text().trim().startsWith('FP')).forEach(button => {
83
+ let text = $(button).text()
84
+ let data = text.match(/(FP\w+) (\d+)-(\d+)-(\d+) (.+)/)
85
+ if (!data) return null
86
+
87
+ let [_, version, day, month, year, name] = data
88
+
89
+ if (!nspVersions[version]) nspVersions[version] = new NSPVersion(version, new Date(`${`20${year}`.slice(-4)}-${month}-${day}`))
90
+
91
+ let nspVersion = nspVersions[version]
92
+ nspVersion.addFile(new NSPFile(name.replace(/ NSP.+/, '').trim(), $(button).attr('href'), nspVersion))
93
+ })
94
+
95
+ return Object.values(nspVersions).sort((a, b) => b.effective - a.effective)
96
+ }
package/lib.mjs ADDED
@@ -0,0 +1,7 @@
1
+ import { getNSPVersion, NSPFile, NSPVersion } from './lib/vline-nsp.mjs'
2
+
3
+ export {
4
+ getNSPVersion,
5
+ NSPFile,
6
+ NSPVersion
7
+ }
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "@transportme/vline-nsp-reader",
3
+ "version": "1.0.0",
4
+ "main": "index.js",
5
+ "scripts": {
6
+ "test": "mocha"
7
+ },
8
+ "author": "",
9
+ "license": "ISC",
10
+ "description": "",
11
+ "dependencies": {
12
+ "async": "^3.2.6",
13
+ "cheerio": "^1.0.0",
14
+ "node-fetch": "^3.3.2",
15
+ "pdf2json": "github:eyeballcode/pdf2json"
16
+ },
17
+ "devDependencies": {
18
+ "chai": "^5.1.2",
19
+ "mocha": "^10.8.2",
20
+ "nock": "^13.5.6",
21
+ "tmp-promise": "^3.0.3"
22
+ }
23
+ }
package/read-pdf.mjs ADDED
@@ -0,0 +1,12 @@
1
+ import util from 'util'
2
+ import NSPPDFReader from './lib/nsp-pdf-reader.mjs'
3
+ import TableReader from './lib/table-reader.mjs'
4
+
5
+ // let nspReader = new NSPPDFReader(process.argv[2])
6
+ // await nspReader.read()
7
+ // console.log(util.inspect(nspReader.getAllRuns(), { depth: null, colors: true, maxArrayLength: null }))
8
+
9
+ let tableReader = new TableReader(process.argv[2])
10
+ let pages = await tableReader.read()
11
+
12
+ for (let page of pages) console.table(page)