epg-grabber 0.30.2 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,11 +75,12 @@ Arguments:
75
75
 
76
76
  - `-c, --config`: path to config file
77
77
  - `-o, --output`: path to output file or path template (example: `guides/{site}.{lang}.xml`; default: `guide.xml`)
78
- - `--channels`: path to list of channels
78
+ - `--channels`: path to list of channels; you can also use wildcard to specify the path to multiple files at once (example: `example.com_*.channels.xml`)
79
79
  - `--lang`: set default language for all programs (default: `en`)
80
80
  - `--days`: number of days for which to grab the program (default: `1`)
81
81
  - `--delay`: delay between requests in milliseconds (default: `3000`)
82
82
  - `--timeout`: set a timeout for each request in milliseconds (default: `5000`)
83
+ - `--max-connections`: set a limit on the number of concurrent requests per site (default: `1`)
83
84
  - `--cache-ttl`: maximum time for storing each request in milliseconds (default: `0`)
84
85
  - `--gzip`: compress the output (default: `false`)
85
86
  - `--debug`: enable debug mode (default: `false`)
@@ -93,10 +94,11 @@ Arguments:
93
94
  module.exports = {
94
95
  site: 'example.com', // site domain name (required)
95
96
  output: 'example.com.guide.xml', // path to output file or path template (example: 'guides/{site}.{lang}.xml'; default: 'guide.xml')
96
- channels: 'example.com.channels.xml', // path to channels.xml file (required)
97
+ channels: 'example.com.channels.xml', // path to list of channels; you can also use an array to specify the path to multiple files at once (example: ['channels1.xml', 'channels2.xml']; required)
97
98
  lang: 'fr', // default language for all programs (default: 'en')
98
99
  days: 3, // number of days for which to grab the program (default: 1)
99
100
  delay: 5000, // delay between requests (default: 3000)
101
+ maxConnections: 200, // limit on the number of concurrent requests (default: 1)
100
102
 
101
103
  request: { // request options (details: https://github.com/axios/axios#request-config)
102
104
 
@@ -12,6 +12,7 @@ const { name, version, description } = require('../package.json')
12
12
  const _ = require('lodash')
13
13
  const dayjs = require('dayjs')
14
14
  const utc = require('dayjs/plugin/utc')
15
+ const { TaskQueue } = require('cwait')
15
16
 
16
17
  dayjs.extend(utc)
17
18
 
@@ -21,11 +22,16 @@ program
21
22
  .description(description)
22
23
  .requiredOption('-c, --config <config>', 'Path to [site].config.js file')
23
24
  .option('-o, --output <output>', 'Path to output file')
24
- .option('--channels <channels>', 'Path to channels.xml file')
25
+ .option('--channels <channels>', 'Path to list of channels')
25
26
  .option('--lang <lang>', 'Set default language for all programs')
26
27
  .option('--days <days>', 'Number of days for which to grab the program', parseNumber)
27
28
  .option('--delay <delay>', 'Delay between requests (in milliseconds)', parseNumber)
28
29
  .option('--timeout <timeout>', 'Set a timeout for each request (in milliseconds)', parseNumber)
30
+ .option(
31
+ '--max-connections <maxConnections>',
32
+ 'Set a limit on the number of concurrent requests per site',
33
+ parseNumber
34
+ )
29
35
  .option(
30
36
  '--cache-ttl <cacheTtl>',
31
37
  'Maximum time for storing each request (in milliseconds)',
@@ -53,22 +59,37 @@ async function main() {
53
59
  curl: options.curl,
54
60
  lang: options.lang,
55
61
  delay: options.delay,
62
+ maxConnections: options.maxConnections,
56
63
  request: {}
57
64
  })
58
65
 
59
66
  if (options.timeout) config.request.timeout = options.timeout
60
67
  if (options.cacheTtl) config.request.cache.ttl = options.cacheTtl
68
+
61
69
  if (options.channels) config.channels = options.channels
62
- else if (config.channels)
63
- config.channels = file.join(file.dirname(options.config), config.channels)
64
- else throw new Error("The required 'channels' property is missing")
65
70
 
66
- if (!config.channels) return logger.error('Path to [site].channels.xml is missing')
67
- logger.info(`Loading '${config.channels}'...`)
68
- const grabber = new EPGGrabber(config)
71
+ let parsedChannels = []
72
+ if (config.channels) {
73
+ const dir = file.dirname(options.config)
69
74
 
70
- const channelsXML = file.read(config.channels)
71
- const { channels: parsedChannels } = parseChannels(channelsXML)
75
+ let files = []
76
+ if (Array.isArray(config.channels)) {
77
+ files = config.channels.map(path => file.join(dir, path))
78
+ } else if (typeof config.channels === 'string') {
79
+ files = await file.list(config.channels)
80
+ } else {
81
+ throw new Error('The "channels" attribute must be of type array or string')
82
+ }
83
+
84
+ for (let filepath of files) {
85
+ logger.info(`Loading '${filepath}'...`)
86
+ const channelsXML = file.read(filepath)
87
+ const { channels } = parseChannels(channelsXML)
88
+ parsedChannels = parsedChannels.concat(channels)
89
+ }
90
+ } else throw new Error('Path to "channels" is missing')
91
+
92
+ const grabber = new EPGGrabber(config)
72
93
 
73
94
  let template = options.output || config.output
74
95
  const variables = file.templateVariables(template)
@@ -89,33 +110,45 @@ async function main() {
89
110
  let programs = []
90
111
  let i = 1
91
112
  let days = config.days || 1
113
+ const maxConnections = config.maxConnections || 1
92
114
  const total = channels.length * days
93
115
  const utcDate = getUTCDate()
94
116
  const dates = Array.from({ length: days }, (_, i) => utcDate.add(i, 'd'))
117
+ const taskQueue = new TaskQueue(Promise, maxConnections)
118
+
119
+ let queue = []
95
120
  for (let channel of channels) {
96
121
  if (!channel.logo && config.logo) {
97
122
  channel.logo = await grabber.loadLogo(channel)
98
123
  }
99
124
 
100
125
  for (let date of dates) {
101
- await grabber
102
- .grab(channel, date, (data, err) => {
103
- logger.info(
104
- `[${i}/${total}] ${config.site} - ${data.channel.xmltv_id} - ${dayjs
105
- .utc(data.date)
106
- .format('MMM D, YYYY')} (${data.programs.length} programs)`
107
- )
108
-
109
- if (err) logger.error(err.message)
110
-
111
- if (i < total) i++
112
- })
113
- .then(results => {
114
- programs = programs.concat(results)
115
- })
126
+ queue.push({ channel, date })
116
127
  }
117
128
  }
118
129
 
130
+ await Promise.all(
131
+ queue.map(
132
+ taskQueue.wrap(async ({ channel, date }) => {
133
+ await grabber
134
+ .grab(channel, date, (data, err) => {
135
+ logger.info(
136
+ `[${i}/${total}] ${config.site} - ${data.channel.xmltv_id} - ${dayjs
137
+ .utc(data.date)
138
+ .format('MMM D, YYYY')} (${data.programs.length} programs)`
139
+ )
140
+
141
+ if (err) logger.error(err.message)
142
+
143
+ if (i < total) i++
144
+ })
145
+ .then(results => {
146
+ programs = programs.concat(results)
147
+ })
148
+ })
149
+ )
150
+ )
151
+
119
152
  programs = _.uniqBy(programs, p => p.start + p.channel)
120
153
 
121
154
  const xml = generateXMLTV({ channels, programs })
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "epg-grabber",
3
- "version": "0.30.2",
3
+ "version": "0.32.0",
4
4
  "description": "Node.js CLI tool for grabbing EPG from different sites",
5
5
  "main": "src/index.js",
6
6
  "preferGlobal": true,
@@ -34,6 +34,7 @@
34
34
  "axios-mock-adapter": "^1.20.0",
35
35
  "commander": "^7.1.0",
36
36
  "curl-generator": "^0.2.0",
37
+ "cwait": "^1.1.2",
37
38
  "dayjs": "^1.10.4",
38
39
  "epg-parser": "^0.1.6",
39
40
  "fs-extra": "^11.1.1",
package/src/client.js CHANGED
@@ -13,125 +13,125 @@ module.exports.parseResponse = parseResponse
13
13
  let timeout
14
14
 
15
15
  function create(config) {
16
- const client = setupCache(
17
- axios.create({
18
- headers: {
19
- 'User-Agent':
20
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'
21
- }
22
- })
23
- )
24
-
25
- client.interceptors.request.use(
26
- function (request) {
27
- if (config.debug) {
28
- console.log('Request:', JSON.stringify(request, null, 2))
29
- }
30
- return request
31
- },
32
- function (error) {
33
- return Promise.reject(error)
34
- }
35
- )
36
-
37
- client.interceptors.response.use(
38
- function (response) {
39
- if (config.debug) {
40
- const data =
41
- isObject(response.data) || Array.isArray(response.data)
42
- ? JSON.stringify(response.data)
43
- : response.data.toString()
44
- console.log(
45
- 'Response:',
46
- JSON.stringify(
47
- {
48
- headers: response.headers,
49
- data,
50
- cached: response.cached
51
- },
52
- null,
53
- 2
54
- )
55
- )
56
- }
57
-
58
- clearTimeout(timeout)
59
- return response
60
- },
61
- function (error) {
62
- clearTimeout(timeout)
63
- return Promise.reject(error)
64
- }
65
- )
66
-
67
- return client
16
+ const client = setupCache(
17
+ axios.create({
18
+ headers: {
19
+ 'User-Agent':
20
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36 Edg/79.0.309.71'
21
+ }
22
+ })
23
+ )
24
+
25
+ client.interceptors.request.use(
26
+ function (request) {
27
+ if (config.debug) {
28
+ console.log('Request:', JSON.stringify(request, null, 2))
29
+ }
30
+ return request
31
+ },
32
+ function (error) {
33
+ return Promise.reject(error)
34
+ }
35
+ )
36
+
37
+ client.interceptors.response.use(
38
+ function (response) {
39
+ if (config.debug) {
40
+ const data =
41
+ isObject(response.data) || Array.isArray(response.data)
42
+ ? JSON.stringify(response.data)
43
+ : response.data.toString()
44
+ console.log(
45
+ 'Response:',
46
+ JSON.stringify(
47
+ {
48
+ headers: response.headers,
49
+ data,
50
+ cached: response.cached
51
+ },
52
+ null,
53
+ 2
54
+ )
55
+ )
56
+ }
57
+
58
+ clearTimeout(timeout)
59
+ return response
60
+ },
61
+ function (error) {
62
+ clearTimeout(timeout)
63
+ return Promise.reject(error)
64
+ }
65
+ )
66
+
67
+ return client
68
68
  }
69
69
 
70
70
  async function buildRequest({ channel, date, config }) {
71
- const CancelToken = axios.CancelToken
72
- const source = CancelToken.source()
73
- const request = { ...config.request }
74
- timeout = setTimeout(() => {
75
- source.cancel('Connection timeout')
76
- }, request.timeout)
77
- request.headers = await getRequestHeaders({ channel, date, config })
78
- request.url = await getRequestUrl({ channel, date, config })
79
- request.data = await getRequestData({ channel, date, config })
80
- request.cancelToken = source.token
81
-
82
- if (config.curl) {
83
- const curl = CurlGenerator({
84
- url: request.url,
85
- method: request.method,
86
- headers: request.headers,
87
- body: request.data
88
- })
89
- console.log(curl)
90
- }
91
-
92
- return request
71
+ const CancelToken = axios.CancelToken
72
+ const source = CancelToken.source()
73
+ const request = { ...config.request }
74
+ timeout = setTimeout(() => {
75
+ source.cancel('Connection timeout')
76
+ }, request.timeout)
77
+ request.headers = await getRequestHeaders({ channel, date, config })
78
+ request.url = await getRequestUrl({ channel, date, config })
79
+ request.data = await getRequestData({ channel, date, config })
80
+ request.cancelToken = source.token
81
+
82
+ if (config.curl) {
83
+ const curl = CurlGenerator({
84
+ url: request.url,
85
+ method: request.method,
86
+ headers: request.headers,
87
+ body: request.data
88
+ })
89
+ console.log(curl)
90
+ }
91
+
92
+ return request
93
93
  }
94
94
 
95
95
  function parseResponse(response) {
96
- return {
97
- content: response.data.toString(),
98
- buffer: response.data,
99
- headers: response.headers,
100
- request: response.request,
101
- cached: response.cached
102
- }
96
+ return {
97
+ content: response.data.toString(),
98
+ buffer: response.data,
99
+ headers: response.headers,
100
+ request: response.request,
101
+ cached: response.cached
102
+ }
103
103
  }
104
104
 
105
105
  async function getRequestHeaders({ channel, date, config }) {
106
- if (typeof config.request.headers === 'function') {
107
- const headers = config.request.headers({ channel, date })
108
- if (isPromise(headers)) {
109
- return await headers
110
- }
111
- return headers
112
- }
113
-
114
- return config.request.headers || null
106
+ if (typeof config.request.headers === 'function') {
107
+ const headers = config.request.headers({ channel, date })
108
+ if (isPromise(headers)) {
109
+ return await headers
110
+ }
111
+ return headers
112
+ }
113
+
114
+ return config.request.headers || null
115
115
  }
116
116
 
117
117
  async function getRequestData({ channel, date, config }) {
118
- if (typeof config.request.data === 'function') {
119
- const data = config.request.data({ channel, date })
120
- if (isPromise(data)) {
121
- return await data
122
- }
123
- return data
124
- }
125
- return config.request.data || null
118
+ if (typeof config.request.data === 'function') {
119
+ const data = config.request.data({ channel, date })
120
+ if (isPromise(data)) {
121
+ return await data
122
+ }
123
+ return data
124
+ }
125
+ return config.request.data || null
126
126
  }
127
127
 
128
128
  async function getRequestUrl({ channel, date, config }) {
129
- if (typeof config.url === 'function') {
130
- const url = config.url({ channel, date })
131
- if (isPromise(url)) {
132
- return await url
133
- }
134
- return url
135
- }
136
- return config.url
129
+ if (typeof config.url === 'function') {
130
+ const url = config.url({ channel, date })
131
+ if (isPromise(url)) {
132
+ return await url
133
+ }
134
+ return url
135
+ }
136
+ return config.url
137
137
  }
package/src/file.js CHANGED
@@ -1,6 +1,8 @@
1
1
  const fs = require('fs')
2
2
  const path = require('path')
3
+ const glob = require('glob')
3
4
 
5
+ module.exports.list = list
4
6
  module.exports.read = read
5
7
  module.exports.write = write
6
8
  module.exports.resolve = resolve
@@ -9,6 +11,14 @@ module.exports.dirname = dirname
9
11
  module.exports.templateVariables = templateVariables
10
12
  module.exports.templateFormat = templateFormat
11
13
 
14
+ function list(pattern) {
15
+ return new Promise(resolve => {
16
+ glob(pattern, function (err, files) {
17
+ resolve(files)
18
+ })
19
+ })
20
+ }
21
+
12
22
  function read(filepath) {
13
23
  return fs.readFileSync(path.resolve(filepath), { encoding: 'utf-8' })
14
24
  }
@@ -0,0 +1,7 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <site site="example2.com">
3
+ <channels>
4
+ <channel xmltv_id="3TV.com" site_id="3">3 TV</channel>
5
+ <channel xmltv_id="4TV.com" site_id="4">4 TV</channel>
6
+ </channels>
7
+ </site>
@@ -0,0 +1,32 @@
1
+ const dayjs = require('dayjs')
2
+ const utc = require('dayjs/plugin/utc')
3
+
4
+ dayjs.extend(utc)
5
+
6
+ module.exports = {
7
+ site: 'example.com',
8
+ days: 2,
9
+ channels: ['example.channels.xml', 'example_2.channels.xml'],
10
+ output: 'tests/__data__/output/guide.xml',
11
+ url: () => 'http://example.com/20210319/1tv.json',
12
+ request: {
13
+ method: 'POST',
14
+ headers: {
15
+ 'Content-Type': 'application/json',
16
+ Cookie: 'abc=123'
17
+ },
18
+ data() {
19
+ return { accountID: '123' }
20
+ }
21
+ },
22
+ parser: () => {
23
+ return [
24
+ {
25
+ title: 'Program1',
26
+ start: 1640995200000,
27
+ stop: 1640998800000
28
+ }
29
+ ]
30
+ },
31
+ logo: () => 'http://example.com/logos/1TV.png?x=шеллы&sid=777'
32
+ }
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?><tv date="20230709">
2
+ <channel id="1TV.com"><display-name>1 TV</display-name><icon src="https://example.com/logos/1TV.png"/><url>https://example.com</url></channel>
3
+ <channel id="2TV.com"><display-name>2 TV</display-name><icon src="http://example.com/logos/1TV.png?x=шеллы&amp;sid=777"/><url>https://example.com</url></channel>
4
+ <channel id="3TV.com"><display-name>3 TV</display-name><icon src="http://example.com/logos/1TV.png?x=шеллы&amp;sid=777"/><url>https://example2.com</url></channel>
5
+ <channel id="4TV.com"><display-name>4 TV</display-name><icon src="http://example.com/logos/1TV.png?x=шеллы&amp;sid=777"/><url>https://example2.com</url></channel>
6
+ </tv>
package/tests/bin.test.js CHANGED
@@ -105,3 +105,25 @@ it('removes duplicates of the program', () => {
105
105
 
106
106
  expect(output.programs).toEqual(expected.programs)
107
107
  })
108
+
109
+ it('can load multiple "channels.xml" files at once', () => {
110
+ const stdout = execSync(
111
+ `node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example.config.js --channels=tests/__data__/input/example*.channels.xml --timeout=1`,
112
+ {
113
+ encoding: 'utf8'
114
+ }
115
+ )
116
+
117
+ expect(stdoutResultTester(stdout)).toBe(true)
118
+ })
119
+
120
+ it('can parse list of "channels.xml" from array', () => {
121
+ const stdout = execSync(
122
+ `node ${pwd}/bin/epg-grabber.js --config=tests/__data__/input/example_channels.config.js --timeout=1`,
123
+ {
124
+ encoding: 'utf8'
125
+ }
126
+ )
127
+
128
+ expect(stdoutResultTester(stdout)).toBe(true)
129
+ })
@@ -1,6 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8" ?><tv date="20230511">
2
- <channel id="1TV.com"><display-name>1 TV</display-name><icon src="https://example.com/logos/1TV.png"/><url>https://example.com</url></channel>
3
- <channel id="2TV.com"><display-name>2 TV</display-name><url>https://example.com</url></channel>
4
- <programme start="20220101000000 +0000" stop="20220101010000 +0000" channel="1TV.com"><title lang="fr">Program1</title></programme>
5
- <programme start="20220101000000 +0000" stop="20220101010000 +0000" channel="2TV.com"><title>Program1</title></programme>
6
- </tv>