@transportme/vline-nsp-reader 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import PDFParser from 'pdf2json'
1
+ import PDFParser from '@transportme/pdf2json'
2
2
  import fs from 'fs/promises'
3
3
 
4
4
  export default class TableReader {
@@ -92,5 +92,5 @@ export async function getNSPVersion() {
92
92
  nspVersion.addFile(new NSPFile(name.replace(/ NSP.+/, '').trim(), $(button).attr('href'), nspVersion))
93
93
  })
94
94
 
95
- return Object.values(nspVersions).sort((a, b) => b.effective - a.effective)
95
+ return Object.values(nspVersions).sort((a, b) => b.effective - a.effective).filter(version => version.files.length > 5)
96
96
  }
@@ -0,0 +1,88 @@
1
+ import PassTableReader from './pass-table-reader.mjs'
2
+
3
+ export default class PassPDFReader {
4
+
5
+ #filePath
6
+ #pages
7
+
8
+ constructor(filePath) {
9
+ this.#filePath = filePath
10
+ }
11
+
12
+ getStations(table) {
13
+ return table.map(row => row[0].replace(/a ?r ?r$/, '').replace(/d ?e ?p$/, '').trim())
14
+ }
15
+
16
+ getBody(table) {
17
+ return table.map(row => row.slice(1))
18
+ }
19
+
20
+ async readRuns() {
21
+ const tableReader = new PassTableReader(this.#filePath)
22
+ const tables = await tableReader.read()
23
+
24
+ const runs = []
25
+
26
+ for (const table of tables) {
27
+ const stations = this.getStations(table)
28
+ const body = this.getBody(table)
29
+
30
+ for (let columnIndex = 0; columnIndex < body[0].length; columnIndex++) {
31
+ let currentRun = {
32
+ type: '',
33
+ stops: []
34
+ }
35
+
36
+ let lastStation
37
+ for (let stationIndex = 0; stationIndex < stations.length; stationIndex++) {
38
+ const stationName = stations[stationIndex]
39
+ const stopData = (body[stationIndex][columnIndex] || '').replace('.', ':')
40
+ if (stationName === 'Service') {
41
+ currentRun.type = stopData[0] + stopData.slice(1).toLowerCase()
42
+ continue
43
+ }
44
+ if (stationName === 'Service Information') continue
45
+ if (!stopData || stopData.length === 1) continue
46
+ if (stationName === 'Change Service' && stopData.length) {
47
+ currentRun.stops.push(lastStation)
48
+ runs.push(currentRun)
49
+ lastStation = null
50
+ currentRun = {
51
+ type: stopData[0] + stopData.slice(1).toLowerCase(),
52
+ stops: []
53
+ }
54
+
55
+ continue
56
+ }
57
+
58
+ if (lastStation && lastStation.name === stationName) {
59
+ lastStation.dep = stopData
60
+ } else {
61
+ if (lastStation) currentRun.stops.push(lastStation)
62
+ lastStation = {
63
+ name: stationName,
64
+ arr: stopData,
65
+ dep: stopData
66
+ }
67
+ }
68
+ }
69
+
70
+ currentRun.stops.push(lastStation)
71
+ runs.push(currentRun)
72
+ }
73
+ }
74
+
75
+ return runs.map(run => this.setRunData(run))
76
+ }
77
+
78
+ setRunData(run) {
79
+ return {
80
+ ...run,
81
+ origin: run.stops[0].name,
82
+ destination: run.stops[run.stops.length - 1].name,
83
+ departureTime: run.stops[0].dep,
84
+ destinationArrivalTime: run.stops[run.stops.length - 1].arr,
85
+ }
86
+ }
87
+
88
+ }
@@ -0,0 +1,131 @@
1
+ import PDFParser from '@transportme/pdf2json'
2
+ import fs from 'fs/promises'
3
+
4
+ export default class PassTableReader {
5
+
6
+ #file
7
+
8
+ constructor(file) {
9
+ this.#file = file
10
+ }
11
+
12
+ parserCallback(data) {
13
+ return data.Pages.flatMap(page => {
14
+ const { HLines, VLines } = page
15
+
16
+ const heightFrequency = VLines.reduce((acc, line) => {
17
+ if (!acc[line.l]) acc[line.l] = 0
18
+ acc[line.l]++
19
+ return acc
20
+ }, {})
21
+ const commonHeight = parseFloat(Object
22
+ .keys(heightFrequency)
23
+ .map(height => ({ height, freq: heightFrequency[height] }))
24
+ .sort((a, b) => b.freq - a.freq)[0].height)
25
+
26
+ const rowStarts = VLines
27
+ .filter(fill => Math.abs(fill.l - commonHeight) < 0.1)
28
+ .map(fill => fill.y)
29
+ .filter((e, i, a) => a.indexOf(e) === i)
30
+ .sort((a, b) => a - b)
31
+
32
+ const tables = rowStarts.slice(1).reduce((acc, row) => {
33
+ const currTable = acc[acc.length - 1]
34
+ const tableEnd = currTable[currTable.length - 1]
35
+ if (row - tableEnd > commonHeight * 1.5) {
36
+ // New table
37
+ acc.push([row])
38
+ } else {
39
+ // Existing table
40
+ currTable.push(row)
41
+ }
42
+
43
+ return acc
44
+ }, [[rowStarts[0]]])
45
+
46
+ return tables.map(rowStarts => {
47
+ const tableStart = rowStarts[0]
48
+ const tableEnd = rowStarts[rowStarts.length - 1]
49
+
50
+ const colStarts = HLines
51
+ .filter(fill => tableStart - 0.1 < fill.y && fill.y < tableEnd + commonHeight * 1.5)
52
+ .map(fill => fill.x)
53
+ .filter((e, i, a) => a.indexOf(e) === i)
54
+ .sort((a, b) => a - b)
55
+
56
+ const tableData = []
57
+
58
+ page.Texts.forEach(text => {
59
+ let textContent = decodeURIComponent(text.R[0].T)
60
+
61
+ let firstYGreater = rowStarts.find(r => r > text.y + 0.3)
62
+ let currentRow = rowStarts.indexOf(firstYGreater) - 1
63
+ if (currentRow < 0) return
64
+
65
+ let currentCol = colStarts.findLastIndex(c => c < text.x + 0.4)
66
+
67
+ if (!tableData[currentRow]) tableData[currentRow] = []
68
+
69
+ if (!tableData[currentRow][currentCol]) tableData[currentRow][currentCol] = textContent
70
+ else tableData[currentRow][currentCol] += ` ${textContent}`
71
+ })
72
+
73
+ for (let y = 0; y < tableData.length; y++) {
74
+ if (!tableData[y]) tableData[y] = []
75
+ for (let x = 0; x < tableData[y].length; x++) {
76
+ if (!tableData[y][x]) tableData[y][x] = ''
77
+ }
78
+ }
79
+
80
+ return tableData
81
+ })
82
+
83
+ console.log(tables)
84
+
85
+ console.log(heightFrequency)
86
+ console.log(VLines)
87
+ console.log(rowStarts)
88
+
89
+ let pageData = []
90
+ let smallTable = rowStarts.length === 2
91
+
92
+ return pageData
93
+ })
94
+
95
+ return pages[0]
96
+
97
+ // return pages.map(page => {
98
+ // let maxSize = Math.max(...page.map(row => row.length))
99
+ // let blankCells = Array(maxSize).fill('')
100
+
101
+ // return page.map(row => row.map(g => g.replace(/ +/g, ' ').trim()).concat(blankCells).slice(0, maxSize))
102
+ // })
103
+ }
104
+
105
+ read() {
106
+ return new Promise(async (resolve, reject) => {
107
+ let pdfParser = new PDFParser()
108
+
109
+ pdfParser.on("pdfParser_dataReady", data => {
110
+ try {
111
+ resolve(this.parserCallback(data))
112
+ } catch (err) {
113
+ reject(err)
114
+ }
115
+ })
116
+
117
+ pdfParser.on("pdfParser_dataError", err => {
118
+ reject(err)
119
+ })
120
+
121
+ try {
122
+ let pdfBuffer = await fs.readFile(this.#file)
123
+ pdfParser.parseBuffer(pdfBuffer)
124
+ } catch (err) {
125
+ reject(err)
126
+ }
127
+ })
128
+
129
+ }
130
+
131
+ }
package/lib.mjs CHANGED
@@ -1,7 +1,9 @@
1
- import { getNSPVersion, NSPFile, NSPVersion } from './lib/vline-nsp.mjs'
1
+ import { getNSPVersion, NSPFile, NSPVersion } from './lib/nsp/vline-nsp.mjs'
2
+ import PassPDFReader from './lib/pass/pass-pdf-reader.mjs'
2
3
 
3
4
  export {
4
5
  getNSPVersion,
5
6
  NSPFile,
6
- NSPVersion
7
+ NSPVersion,
8
+ PassPDFReader
7
9
  }
package/package.json CHANGED
@@ -1,18 +1,18 @@
1
1
  {
2
2
  "name": "@transportme/vline-nsp-reader",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "main": "lib.mjs",
5
5
  "scripts": {
6
- "test": "mocha"
6
+ "test": "mocha './{,!(node_modules)/**}/*.test.mjs'"
7
7
  },
8
8
  "author": "",
9
9
  "license": "ISC",
10
10
  "description": "",
11
11
  "dependencies": {
12
+ "@transportme/pdf2json": "^4.0.10000004",
12
13
  "async": "^3.2.6",
13
14
  "cheerio": "^1.0.0",
14
- "node-fetch": "^3.3.2",
15
- "pdf2json": "github:eyeballcode/pdf2json"
15
+ "node-fetch": "^3.3.2"
16
16
  },
17
17
  "devDependencies": {
18
18
  "chai": "^5.1.2",
package/read-pdf.mjs CHANGED
@@ -1,12 +1,10 @@
1
- import util from 'util'
2
- import NSPPDFReader from './lib/nsp-pdf-reader.mjs'
3
- import TableReader from './lib/table-reader.mjs'
1
+ import PassTableReader from './lib/pass/pass-table-reader.mjs'
4
2
 
5
3
  // let nspReader = new NSPPDFReader(process.argv[2])
6
4
  // await nspReader.read()
7
5
  // console.log(util.inspect(nspReader.getAllRuns(), { depth: null, colors: true, maxArrayLength: null }))
8
6
 
9
- let tableReader = new TableReader(process.argv[2])
10
- let pages = await tableReader.read()
7
+ let tableReader = new PassTableReader(process.argv[2])
8
+ let tables = await tableReader.read()
11
9
 
12
- for (let page of pages) console.table(page)
10
+ for (let table of tables) console.table(table)
File without changes
File without changes
File without changes