@transportme/vline-nsp-reader 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/heat/constants.mjs +4 -0
- package/lib/heat/vline-heat.mjs +93 -0
- package/lib/pass/pass-pdf-reader.mjs +89 -0
- package/lib/pass/pass-table-reader.mjs +132 -0
- package/lib.mjs +8 -2
- package/package.json +3 -3
- package/read-pdf.mjs +4 -6
- /package/lib/{constants.mjs → nsp/constants.mjs} +0 -0
- /package/lib/{nsp-pdf-reader.mjs → nsp/nsp-pdf-reader.mjs} +0 -0
- /package/lib/{nsp-utils.mjs → nsp/nsp-utils.mjs} +0 -0
- /package/lib/{table-reader.mjs → nsp/table-reader.mjs} +0 -0
- /package/lib/{termini.mjs → nsp/termini.mjs} +0 -0
- /package/lib/{vline-nsp.mjs → nsp/vline-nsp.mjs} +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import fetch from 'node-fetch';
|
|
2
|
+
import constants from './constants.mjs'
|
|
3
|
+
import { load as parseHTML } from 'cheerio'
|
|
4
|
+
import async from 'async'
|
|
5
|
+
import fs from 'fs/promises'
|
|
6
|
+
import path from 'path'
|
|
7
|
+
import { pipeline } from 'stream/promises'
|
|
8
|
+
import { createWriteStream } from 'fs'
|
|
9
|
+
import PassPDFReader from '../pass/pass-pdf-reader.mjs'
|
|
10
|
+
|
|
11
|
+
export class HeatTimetable {
|
|
12
|
+
|
|
13
|
+
files = []
|
|
14
|
+
|
|
15
|
+
constructor() {
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
addFile(file) {
|
|
19
|
+
this.files.push(file)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
async saveFiles(outputDir) {
|
|
23
|
+
try {
|
|
24
|
+
await fs.mkdir(outputDir)
|
|
25
|
+
} catch (e) {}
|
|
26
|
+
await async.forEach(this.files, async file => {
|
|
27
|
+
await file.download(outputDir)
|
|
28
|
+
})
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export class HeatTimetableFile {
|
|
34
|
+
|
|
35
|
+
line
|
|
36
|
+
type
|
|
37
|
+
href
|
|
38
|
+
|
|
39
|
+
#filePath
|
|
40
|
+
|
|
41
|
+
constructor(line, type, href) {
|
|
42
|
+
this.line = line
|
|
43
|
+
this.type = type
|
|
44
|
+
this.href = href
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async download(outputDir) {
|
|
48
|
+
this.#filePath = path.join(outputDir, `${this.line} - ${this.type}.pdf`)
|
|
49
|
+
|
|
50
|
+
let response = await fetch(constants.VLINE_HOST + this.href)
|
|
51
|
+
let outputStream = createWriteStream(this.#filePath)
|
|
52
|
+
|
|
53
|
+
await pipeline(response.body, outputStream)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
setFilePath(filePath) {
|
|
57
|
+
this.#filePath = filePath
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async extractRuns() {
|
|
61
|
+
let reader = new PassPDFReader(this.#filePath)
|
|
62
|
+
return await reader.readRuns()
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static fromFile(pathname) {
|
|
66
|
+
const filename = path.basename(pathname).replace('.pdf', '')
|
|
67
|
+
const [line, type] = filename.split(' - ')
|
|
68
|
+
const file = new HeatTimetableFile(line, type, '')
|
|
69
|
+
file.setFilePath(pathname)
|
|
70
|
+
|
|
71
|
+
return file
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export async function getHeatTimetables() {
|
|
77
|
+
let body = await (await fetch(constants.VLINE_HOST + constants.HEAT_PAGE)).text()
|
|
78
|
+
let $ = parseHTML(body)
|
|
79
|
+
|
|
80
|
+
let buttons = Array.from($('div.TimeTableHeaderMainContainer > a.button-file-link-caption'))
|
|
81
|
+
const timetable = new HeatTimetable()
|
|
82
|
+
|
|
83
|
+
buttons.forEach(button => {
|
|
84
|
+
let text = $(button).text().replace(/PDF.+/, '').replace(' extreme heat timetable', '').replace(/via .+\(/, '(').replace(/ \/ \w+/, '')
|
|
85
|
+
let data = text.match(/([\w ]+) \((.+)\)/)
|
|
86
|
+
if (!data) return null
|
|
87
|
+
|
|
88
|
+
let [_, line, type] = data
|
|
89
|
+
timetable.addFile(new HeatTimetableFile(line, type, $(button).attr('href')))
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
return timetable.files.length ? timetable : null
|
|
93
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import PassTableReader from './pass-table-reader.mjs'
|
|
2
|
+
|
|
3
|
+
export default class PassPDFReader {
|
|
4
|
+
|
|
5
|
+
#filePath
|
|
6
|
+
#pages
|
|
7
|
+
|
|
8
|
+
constructor(filePath) {
|
|
9
|
+
this.#filePath = filePath
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
getStations(table) {
|
|
13
|
+
return table.map(row => row[0].replace(/a ?r ?r$/, '').replace(/d ?e ?p$/, '').trim())
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
getBody(table) {
|
|
17
|
+
return table.map(row => row.slice(1))
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async readRuns() {
|
|
21
|
+
const tableReader = new PassTableReader(this.#filePath)
|
|
22
|
+
const tables = await tableReader.read()
|
|
23
|
+
|
|
24
|
+
const runs = []
|
|
25
|
+
|
|
26
|
+
for (const table of tables) {
|
|
27
|
+
const stations = this.getStations(table)
|
|
28
|
+
const body = this.getBody(table)
|
|
29
|
+
|
|
30
|
+
for (let columnIndex = 0; columnIndex < body[0].length; columnIndex++) {
|
|
31
|
+
let currentRun = {
|
|
32
|
+
type: '',
|
|
33
|
+
stops: []
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
let lastStation
|
|
37
|
+
for (let stationIndex = 0; stationIndex < stations.length; stationIndex++) {
|
|
38
|
+
const stationName = stations[stationIndex]
|
|
39
|
+
const stopData = (body[stationIndex][columnIndex] || '').replace('.', ':')
|
|
40
|
+
if (stationName === 'Service') {
|
|
41
|
+
currentRun.type = stopData[0] + stopData.slice(1).toLowerCase()
|
|
42
|
+
continue
|
|
43
|
+
}
|
|
44
|
+
if (stationName === 'Service Information') continue
|
|
45
|
+
if (!stopData || stopData.length === 1) continue
|
|
46
|
+
if (stationName === 'Change Service' && !currentRun.stops.length) continue
|
|
47
|
+
if (stationName === 'Change Service' && stopData.length) {
|
|
48
|
+
currentRun.stops.push(lastStation)
|
|
49
|
+
runs.push(currentRun)
|
|
50
|
+
lastStation = null
|
|
51
|
+
currentRun = {
|
|
52
|
+
type: stopData[0] + stopData.slice(1).toLowerCase(),
|
|
53
|
+
stops: []
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
continue
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (lastStation && lastStation.name === stationName) {
|
|
60
|
+
lastStation.dep = stopData
|
|
61
|
+
} else {
|
|
62
|
+
if (lastStation) currentRun.stops.push(lastStation)
|
|
63
|
+
lastStation = {
|
|
64
|
+
name: stationName,
|
|
65
|
+
arr: stopData,
|
|
66
|
+
dep: stopData
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (lastStation) currentRun.stops.push(lastStation)
|
|
72
|
+
if (currentRun.stops.length) runs.push(currentRun)
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return runs.map(run => this.setRunData(run))
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
setRunData(run) {
|
|
80
|
+
return {
|
|
81
|
+
...run,
|
|
82
|
+
origin: run.stops[0].name,
|
|
83
|
+
destination: run.stops[run.stops.length - 1].name,
|
|
84
|
+
departureTime: run.stops[0].dep,
|
|
85
|
+
destinationArrivalTime: run.stops[run.stops.length - 1].arr,
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import PDFParser from '@transportme/pdf2json'
|
|
2
|
+
import fs from 'fs/promises'
|
|
3
|
+
|
|
4
|
+
export default class PassTableReader {
|
|
5
|
+
|
|
6
|
+
#file
|
|
7
|
+
|
|
8
|
+
constructor(file) {
|
|
9
|
+
this.#file = file
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
parserCallback(data) {
|
|
13
|
+
return data.Pages.flatMap(page => {
|
|
14
|
+
const { HLines, VLines } = page
|
|
15
|
+
|
|
16
|
+
const heightFrequency = VLines.reduce((acc, line) => {
|
|
17
|
+
if (!acc[line.l]) acc[line.l] = 0
|
|
18
|
+
acc[line.l]++
|
|
19
|
+
return acc
|
|
20
|
+
}, {})
|
|
21
|
+
const commonHeight = parseFloat(Object
|
|
22
|
+
.keys(heightFrequency)
|
|
23
|
+
.map(height => ({ height, freq: heightFrequency[height] }))
|
|
24
|
+
.sort((a, b) => b.freq - a.freq)[0].height)
|
|
25
|
+
|
|
26
|
+
const rowStarts = VLines
|
|
27
|
+
.filter(fill => Math.abs(fill.l - commonHeight) < 0.1)
|
|
28
|
+
.map(fill => fill.y)
|
|
29
|
+
.filter((e, i, a) => a.indexOf(e) === i)
|
|
30
|
+
.sort((a, b) => a - b)
|
|
31
|
+
|
|
32
|
+
const tables = rowStarts.slice(1).reduce((acc, row) => {
|
|
33
|
+
const currTable = acc[acc.length - 1]
|
|
34
|
+
const tableEnd = currTable[currTable.length - 1]
|
|
35
|
+
if (row - tableEnd > commonHeight * 1.5) {
|
|
36
|
+
// New table
|
|
37
|
+
acc.push([row])
|
|
38
|
+
} else {
|
|
39
|
+
// Existing table
|
|
40
|
+
currTable.push(row)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return acc
|
|
44
|
+
}, [[rowStarts[0]]])
|
|
45
|
+
|
|
46
|
+
return tables.map(rowStarts => {
|
|
47
|
+
const tableStart = rowStarts[0]
|
|
48
|
+
const tableEnd = rowStarts[rowStarts.length - 1]
|
|
49
|
+
|
|
50
|
+
const colStarts = HLines
|
|
51
|
+
.filter(fill => tableStart - 0.1 < fill.y && fill.y < tableEnd + commonHeight * 1.5)
|
|
52
|
+
.map(fill => fill.x)
|
|
53
|
+
.filter((e, i, a) => a.indexOf(e) === i)
|
|
54
|
+
.sort((a, b) => a - b)
|
|
55
|
+
|
|
56
|
+
const tableData = []
|
|
57
|
+
|
|
58
|
+
page.Texts.forEach(text => {
|
|
59
|
+
if (text.y < tableStart - commonHeight || text.y > tableEnd + commonHeight * 1.5) return
|
|
60
|
+
|
|
61
|
+
let textContent = decodeURIComponent(text.R[0].T)
|
|
62
|
+
|
|
63
|
+
let currentRow = rowStarts.findIndex(r => r > text.y + 0.3) - 1
|
|
64
|
+
if (currentRow < 0) currentRow = rowStarts.length - 1
|
|
65
|
+
|
|
66
|
+
let currentCol = colStarts.findLastIndex(c => c < text.x + 0.4)
|
|
67
|
+
|
|
68
|
+
if (!tableData[currentRow]) tableData[currentRow] = []
|
|
69
|
+
|
|
70
|
+
if (!tableData[currentRow][currentCol]) tableData[currentRow][currentCol] = textContent
|
|
71
|
+
else tableData[currentRow][currentCol] += ` ${textContent}`
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
for (let y = 0; y < tableData.length; y++) {
|
|
75
|
+
if (!tableData[y]) tableData[y] = []
|
|
76
|
+
for (let x = 0; x < tableData[y].length; x++) {
|
|
77
|
+
if (!tableData[y][x]) tableData[y][x] = ''
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return tableData
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
console.log(tables)
|
|
85
|
+
|
|
86
|
+
console.log(heightFrequency)
|
|
87
|
+
console.log(VLines)
|
|
88
|
+
console.log(rowStarts)
|
|
89
|
+
|
|
90
|
+
let pageData = []
|
|
91
|
+
let smallTable = rowStarts.length === 2
|
|
92
|
+
|
|
93
|
+
return pageData
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
return pages[0]
|
|
97
|
+
|
|
98
|
+
// return pages.map(page => {
|
|
99
|
+
// let maxSize = Math.max(...page.map(row => row.length))
|
|
100
|
+
// let blankCells = Array(maxSize).fill('')
|
|
101
|
+
|
|
102
|
+
// return page.map(row => row.map(g => g.replace(/ +/g, ' ').trim()).concat(blankCells).slice(0, maxSize))
|
|
103
|
+
// })
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
read() {
|
|
107
|
+
return new Promise(async (resolve, reject) => {
|
|
108
|
+
let pdfParser = new PDFParser()
|
|
109
|
+
|
|
110
|
+
pdfParser.on("pdfParser_dataReady", data => {
|
|
111
|
+
try {
|
|
112
|
+
resolve(this.parserCallback(data))
|
|
113
|
+
} catch (err) {
|
|
114
|
+
reject(err)
|
|
115
|
+
}
|
|
116
|
+
})
|
|
117
|
+
|
|
118
|
+
pdfParser.on("pdfParser_dataError", err => {
|
|
119
|
+
reject(err)
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
let pdfBuffer = await fs.readFile(this.#file)
|
|
124
|
+
pdfParser.parseBuffer(pdfBuffer)
|
|
125
|
+
} catch (err) {
|
|
126
|
+
reject(err)
|
|
127
|
+
}
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
}
|
package/lib.mjs
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getHeatTimetables, HeatTimetable, HeatTimetableFile } from './lib/heat/vline-heat.mjs'
|
|
2
|
+
import { getNSPVersion, NSPFile, NSPVersion } from './lib/nsp/vline-nsp.mjs'
|
|
3
|
+
import PassPDFReader from './lib/pass/pass-pdf-reader.mjs'
|
|
2
4
|
|
|
3
5
|
export {
|
|
4
6
|
getNSPVersion,
|
|
7
|
+
getHeatTimetables,
|
|
5
8
|
NSPFile,
|
|
6
|
-
NSPVersion
|
|
9
|
+
NSPVersion,
|
|
10
|
+
PassPDFReader,
|
|
11
|
+
HeatTimetable,
|
|
12
|
+
HeatTimetableFile
|
|
7
13
|
}
|
package/package.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@transportme/vline-nsp-reader",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"main": "lib.mjs",
|
|
5
5
|
"scripts": {
|
|
6
|
-
"test": "mocha"
|
|
6
|
+
"test": "mocha './{,!(node_modules)/**}/*.test.mjs'"
|
|
7
7
|
},
|
|
8
8
|
"author": "",
|
|
9
9
|
"license": "ISC",
|
|
10
10
|
"description": "",
|
|
11
11
|
"dependencies": {
|
|
12
|
-
"@transportme/pdf2json": "^4.0.
|
|
12
|
+
"@transportme/pdf2json": "^4.0.10000004",
|
|
13
13
|
"async": "^3.2.6",
|
|
14
14
|
"cheerio": "^1.0.0",
|
|
15
15
|
"node-fetch": "^3.3.2"
|
package/read-pdf.mjs
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import
|
|
2
|
-
import NSPPDFReader from './lib/nsp-pdf-reader.mjs'
|
|
3
|
-
import TableReader from './lib/table-reader.mjs'
|
|
1
|
+
import PassTableReader from './lib/pass/pass-table-reader.mjs'
|
|
4
2
|
|
|
5
3
|
// let nspReader = new NSPPDFReader(process.argv[2])
|
|
6
4
|
// await nspReader.read()
|
|
7
5
|
// console.log(util.inspect(nspReader.getAllRuns(), { depth: null, colors: true, maxArrayLength: null }))
|
|
8
6
|
|
|
9
|
-
let tableReader = new
|
|
10
|
-
let
|
|
7
|
+
let tableReader = new PassTableReader(process.argv[2])
|
|
8
|
+
let tables = await tableReader.read()
|
|
11
9
|
|
|
12
|
-
for (let
|
|
10
|
+
for (let table of tables) console.table(table)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|