@transportme/vline-nsp-reader 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ export default {
2
+ VLINE_HOST: 'https://vline.com.au',
3
+ HEAT_PAGE: '/Timetables/heat'
4
+ }
@@ -0,0 +1,93 @@
1
+ import fetch from 'node-fetch';
2
+ import constants from './constants.mjs'
3
+ import { load as parseHTML } from 'cheerio'
4
+ import async from 'async'
5
+ import fs from 'fs/promises'
6
+ import path from 'path'
7
+ import { pipeline } from 'stream/promises'
8
+ import { createWriteStream } from 'fs'
9
+ import PassPDFReader from '../pass/pass-pdf-reader.mjs'
10
+
11
+ export class HeatTimetable {
12
+
13
+ files = []
14
+
15
+ constructor() {
16
+ }
17
+
18
+ addFile(file) {
19
+ this.files.push(file)
20
+ }
21
+
22
+ async saveFiles(outputDir) {
23
+ try {
24
+ await fs.mkdir(outputDir)
25
+ } catch (e) {}
26
+ await async.forEach(this.files, async file => {
27
+ await file.download(outputDir)
28
+ })
29
+ }
30
+
31
+ }
32
+
33
+ export class HeatTimetableFile {
34
+
35
+ line
36
+ type
37
+ href
38
+
39
+ #filePath
40
+
41
+ constructor(line, type, href) {
42
+ this.line = line
43
+ this.type = type
44
+ this.href = href
45
+ }
46
+
47
+ async download(outputDir) {
48
+ this.#filePath = path.join(outputDir, `${this.line} - ${this.type}.pdf`)
49
+
50
+ let response = await fetch(constants.VLINE_HOST + this.href)
51
+ let outputStream = createWriteStream(this.#filePath)
52
+
53
+ await pipeline(response.body, outputStream)
54
+ }
55
+
56
+ setFilePath(filePath) {
57
+ this.#filePath = filePath
58
+ }
59
+
60
+ async extractRuns() {
61
+ let reader = new PassPDFReader(this.#filePath)
62
+ return await reader.readRuns()
63
+ }
64
+
65
+ static fromFile(pathname) {
66
+ const filename = path.basename(pathname).replace('.pdf', '')
67
+ const [line, type] = filename.split(' - ')
68
+ const file = new HeatTimetableFile(line, type, '')
69
+ file.setFilePath(pathname)
70
+
71
+ return file
72
+ }
73
+
74
+ }
75
+
76
+ export async function getHeatTimetables() {
77
+ let body = await (await fetch(constants.VLINE_HOST + constants.HEAT_PAGE)).text()
78
+ let $ = parseHTML(body)
79
+
80
+ let buttons = Array.from($('div.TimeTableHeaderMainContainer > a.button-file-link-caption'))
81
+ const timetable = new HeatTimetable()
82
+
83
+ buttons.forEach(button => {
84
+ let text = $(button).text().replace(/PDF.+/, '').replace(' extreme heat timetable', '').replace(/via .+\(/, '(').replace(/ \/ \w+/, '')
85
+ let data = text.match(/([\w ]+) \((.+)\)/)
86
+ if (!data) return null
87
+
88
+ let [_, line, type] = data
89
+ timetable.addFile(new HeatTimetableFile(line, type, $(button).attr('href')))
90
+ })
91
+
92
+ return timetable.files.length ? timetable : null
93
+ }
@@ -0,0 +1,89 @@
1
+ import PassTableReader from './pass-table-reader.mjs'
2
+
3
+ export default class PassPDFReader {
4
+
5
+ #filePath
6
+ #pages
7
+
8
+ constructor(filePath) {
9
+ this.#filePath = filePath
10
+ }
11
+
12
+ getStations(table) {
13
+ return table.map(row => row[0].replace(/a ?r ?r$/, '').replace(/d ?e ?p$/, '').trim())
14
+ }
15
+
16
+ getBody(table) {
17
+ return table.map(row => row.slice(1))
18
+ }
19
+
20
+ async readRuns() {
21
+ const tableReader = new PassTableReader(this.#filePath)
22
+ const tables = await tableReader.read()
23
+
24
+ const runs = []
25
+
26
+ for (const table of tables) {
27
+ const stations = this.getStations(table)
28
+ const body = this.getBody(table)
29
+
30
+ for (let columnIndex = 0; columnIndex < body[0].length; columnIndex++) {
31
+ let currentRun = {
32
+ type: '',
33
+ stops: []
34
+ }
35
+
36
+ let lastStation
37
+ for (let stationIndex = 0; stationIndex < stations.length; stationIndex++) {
38
+ const stationName = stations[stationIndex]
39
+ const stopData = (body[stationIndex][columnIndex] || '').replace('.', ':')
40
+ if (stationName === 'Service') {
41
+ currentRun.type = stopData[0] + stopData.slice(1).toLowerCase()
42
+ continue
43
+ }
44
+ if (stationName === 'Service Information') continue
45
+ if (!stopData || stopData.length === 1) continue
46
+ if (stationName === 'Change Service' && !currentRun.stops.length) continue
47
+ if (stationName === 'Change Service' && stopData.length) {
48
+ currentRun.stops.push(lastStation)
49
+ runs.push(currentRun)
50
+ lastStation = null
51
+ currentRun = {
52
+ type: stopData[0] + stopData.slice(1).toLowerCase(),
53
+ stops: []
54
+ }
55
+
56
+ continue
57
+ }
58
+
59
+ if (lastStation && lastStation.name === stationName) {
60
+ lastStation.dep = stopData
61
+ } else {
62
+ if (lastStation) currentRun.stops.push(lastStation)
63
+ lastStation = {
64
+ name: stationName,
65
+ arr: stopData,
66
+ dep: stopData
67
+ }
68
+ }
69
+ }
70
+
71
+ if (lastStation) currentRun.stops.push(lastStation)
72
+ if (currentRun.stops.length) runs.push(currentRun)
73
+ }
74
+ }
75
+
76
+ return runs.map(run => this.setRunData(run))
77
+ }
78
+
79
+ setRunData(run) {
80
+ return {
81
+ ...run,
82
+ origin: run.stops[0].name,
83
+ destination: run.stops[run.stops.length - 1].name,
84
+ departureTime: run.stops[0].dep,
85
+ destinationArrivalTime: run.stops[run.stops.length - 1].arr,
86
+ }
87
+ }
88
+
89
+ }
@@ -0,0 +1,132 @@
1
+ import PDFParser from '@transportme/pdf2json'
2
+ import fs from 'fs/promises'
3
+
4
+ export default class PassTableReader {
5
+
6
+ #file
7
+
8
+ constructor(file) {
9
+ this.#file = file
10
+ }
11
+
12
+ parserCallback(data) {
13
+ return data.Pages.flatMap(page => {
14
+ const { HLines, VLines } = page
15
+
16
+ const heightFrequency = VLines.reduce((acc, line) => {
17
+ if (!acc[line.l]) acc[line.l] = 0
18
+ acc[line.l]++
19
+ return acc
20
+ }, {})
21
+ const commonHeight = parseFloat(Object
22
+ .keys(heightFrequency)
23
+ .map(height => ({ height, freq: heightFrequency[height] }))
24
+ .sort((a, b) => b.freq - a.freq)[0].height)
25
+
26
+ const rowStarts = VLines
27
+ .filter(fill => Math.abs(fill.l - commonHeight) < 0.1)
28
+ .map(fill => fill.y)
29
+ .filter((e, i, a) => a.indexOf(e) === i)
30
+ .sort((a, b) => a - b)
31
+
32
+ const tables = rowStarts.slice(1).reduce((acc, row) => {
33
+ const currTable = acc[acc.length - 1]
34
+ const tableEnd = currTable[currTable.length - 1]
35
+ if (row - tableEnd > commonHeight * 1.5) {
36
+ // New table
37
+ acc.push([row])
38
+ } else {
39
+ // Existing table
40
+ currTable.push(row)
41
+ }
42
+
43
+ return acc
44
+ }, [[rowStarts[0]]])
45
+
46
+ return tables.map(rowStarts => {
47
+ const tableStart = rowStarts[0]
48
+ const tableEnd = rowStarts[rowStarts.length - 1]
49
+
50
+ const colStarts = HLines
51
+ .filter(fill => tableStart - 0.1 < fill.y && fill.y < tableEnd + commonHeight * 1.5)
52
+ .map(fill => fill.x)
53
+ .filter((e, i, a) => a.indexOf(e) === i)
54
+ .sort((a, b) => a - b)
55
+
56
+ const tableData = []
57
+
58
+ page.Texts.forEach(text => {
59
+ if (text.y < tableStart - commonHeight || text.y > tableEnd + commonHeight * 1.5) return
60
+
61
+ let textContent = decodeURIComponent(text.R[0].T)
62
+
63
+ let currentRow = rowStarts.findIndex(r => r > text.y + 0.3) - 1
64
+ if (currentRow < 0) currentRow = rowStarts.length - 1
65
+
66
+ let currentCol = colStarts.findLastIndex(c => c < text.x + 0.4)
67
+
68
+ if (!tableData[currentRow]) tableData[currentRow] = []
69
+
70
+ if (!tableData[currentRow][currentCol]) tableData[currentRow][currentCol] = textContent
71
+ else tableData[currentRow][currentCol] += ` ${textContent}`
72
+ })
73
+
74
+ for (let y = 0; y < tableData.length; y++) {
75
+ if (!tableData[y]) tableData[y] = []
76
+ for (let x = 0; x < tableData[y].length; x++) {
77
+ if (!tableData[y][x]) tableData[y][x] = ''
78
+ }
79
+ }
80
+
81
+ return tableData
82
+ })
83
+
84
+ console.log(tables)
85
+
86
+ console.log(heightFrequency)
87
+ console.log(VLines)
88
+ console.log(rowStarts)
89
+
90
+ let pageData = []
91
+ let smallTable = rowStarts.length === 2
92
+
93
+ return pageData
94
+ })
95
+
96
+ return pages[0]
97
+
98
+ // return pages.map(page => {
99
+ // let maxSize = Math.max(...page.map(row => row.length))
100
+ // let blankCells = Array(maxSize).fill('')
101
+
102
+ // return page.map(row => row.map(g => g.replace(/ +/g, ' ').trim()).concat(blankCells).slice(0, maxSize))
103
+ // })
104
+ }
105
+
106
+ read() {
107
+ return new Promise(async (resolve, reject) => {
108
+ let pdfParser = new PDFParser()
109
+
110
+ pdfParser.on("pdfParser_dataReady", data => {
111
+ try {
112
+ resolve(this.parserCallback(data))
113
+ } catch (err) {
114
+ reject(err)
115
+ }
116
+ })
117
+
118
+ pdfParser.on("pdfParser_dataError", err => {
119
+ reject(err)
120
+ })
121
+
122
+ try {
123
+ let pdfBuffer = await fs.readFile(this.#file)
124
+ pdfParser.parseBuffer(pdfBuffer)
125
+ } catch (err) {
126
+ reject(err)
127
+ }
128
+ })
129
+
130
+ }
131
+
132
+ }
package/lib.mjs CHANGED
@@ -1,7 +1,13 @@
1
- import { getNSPVersion, NSPFile, NSPVersion } from './lib/vline-nsp.mjs'
1
+ import { getHeatTimetables, HeatTimetable, HeatTimetableFile } from './lib/heat/vline-heat.mjs'
2
+ import { getNSPVersion, NSPFile, NSPVersion } from './lib/nsp/vline-nsp.mjs'
3
+ import PassPDFReader from './lib/pass/pass-pdf-reader.mjs'
2
4
 
3
5
  export {
4
6
  getNSPVersion,
7
+ getHeatTimetables,
5
8
  NSPFile,
6
- NSPVersion
9
+ NSPVersion,
10
+ PassPDFReader,
11
+ HeatTimetable,
12
+ HeatTimetableFile
7
13
  }
package/package.json CHANGED
@@ -1,15 +1,15 @@
1
1
  {
2
2
  "name": "@transportme/vline-nsp-reader",
3
- "version": "1.0.3",
3
+ "version": "1.0.5",
4
4
  "main": "lib.mjs",
5
5
  "scripts": {
6
- "test": "mocha"
6
+ "test": "mocha './{,!(node_modules)/**}/*.test.mjs'"
7
7
  },
8
8
  "author": "",
9
9
  "license": "ISC",
10
10
  "description": "",
11
11
  "dependencies": {
12
- "@transportme/pdf2json": "^4.0.10000002",
12
+ "@transportme/pdf2json": "^4.0.10000004",
13
13
  "async": "^3.2.6",
14
14
  "cheerio": "^1.0.0",
15
15
  "node-fetch": "^3.3.2"
package/read-pdf.mjs CHANGED
@@ -1,12 +1,10 @@
1
- import util from 'util'
2
- import NSPPDFReader from './lib/nsp-pdf-reader.mjs'
3
- import TableReader from './lib/table-reader.mjs'
1
+ import PassTableReader from './lib/pass/pass-table-reader.mjs'
4
2
 
5
3
  // let nspReader = new NSPPDFReader(process.argv[2])
6
4
  // await nspReader.read()
7
5
  // console.log(util.inspect(nspReader.getAllRuns(), { depth: null, colors: true, maxArrayLength: null }))
8
6
 
9
- let tableReader = new TableReader(process.argv[2])
10
- let pages = await tableReader.read()
7
+ let tableReader = new PassTableReader(process.argv[2])
8
+ let tables = await tableReader.read()
11
9
 
12
- for (let page of pages) console.table(page)
10
+ for (let table of tables) console.table(table)
File without changes
File without changes
File without changes
File without changes
File without changes