canada-api 5.1.4 → 5.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +277 -277
- package/dist/ca.js +1 -1
- package/package.json +44 -44
- package/src/children.js +48 -48
- package/src/content.js +20 -20
- package/src/index.js +24 -24
- package/src/meta.js +102 -102
- package/src/normalize.js +37 -37
- package/src/request.js +56 -39
package/README.md
CHANGED
|
@@ -1,277 +1,277 @@
|
|
|
1
|
-
([Français](#canada-api-1))
|
|
2
|
-
|
|
3
|
-
# canada-api
|
|
4
|
-
|
|
5
|
-
[](https://www.npmjs.com/package/canada-api) [](https://github.com/dnd-mdn/canada-api/blob/main/LICENSE.md)
|
|
6
|
-
|
|
7
|
-
Cross platform API for fetching public data from [canada.ca](https://www.canada.ca).
|
|
8
|
-
|
|
9
|
-
## Browser
|
|
10
|
-
|
|
11
|
-
```html
|
|
12
|
-
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Node 18+
|
|
16
|
-
|
|
17
|
-
### Install
|
|
18
|
-
|
|
19
|
-
```shell
|
|
20
|
-
npm install canada-api
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
### Usage
|
|
24
|
-
|
|
25
|
-
```js
|
|
26
|
-
import ca from 'canada-api'
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
## Testing
|
|
30
|
-
|
|
31
|
-
```shell
|
|
32
|
-
npm test
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
Tests use the built-in Node.js test runner (`node:test`) and require Node 18 or later.
|
|
36
|
-
|
|
37
|
-
## API
|
|
38
|
-
|
|
39
|
-
### `ca.normalize(url)`
|
|
40
|
-
|
|
41
|
-
- `url` {string|URL} - Full URL or relative path (e.g. `'/en/page'` or `'https://www.canada.ca/en/page'`)
|
|
42
|
-
- Returns: {URL} Normalized URL object with cleaned pathname
|
|
43
|
-
|
|
44
|
-
Validates and normalizes a canada.ca URL. Strips the `/content/canadasite` prefix, file extensions, and trailing slashes.
|
|
45
|
-
|
|
46
|
-
Throws {TypeError} if `url` is not a string or URL object.
|
|
47
|
-
Throws {Error} if the URL is not on canada.ca or the path does not start with `/en/` or `/fr/`.
|
|
48
|
-
|
|
49
|
-
### `ca.children(url)`
|
|
50
|
-
|
|
51
|
-
- `url` {string|URL} - Absolute or relative URL
|
|
52
|
-
- Returns: {Promise} Fulfills with a response whose `data` is an array of sitemap entries
|
|
53
|
-
|
|
54
|
-
Fetches and parses the sitemap for the given page, returning its child pages. Entries without a `<loc>` element are skipped.
|
|
55
|
-
|
|
56
|
-
```json
|
|
57
|
-
{
|
|
58
|
-
"data": [
|
|
59
|
-
{
|
|
60
|
-
"path": "/en/department-national-defence/maple-leaf",
|
|
61
|
-
"lastmod": "2022-09-20T00:00:00.000Z"
|
|
62
|
-
}
|
|
63
|
-
],
|
|
64
|
-
"status": 200,
|
|
65
|
-
"statusText": "OK",
|
|
66
|
-
"headers": {
|
|
67
|
-
"content-type": "text/xml"
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
### `ca.content(url)`
|
|
73
|
-
|
|
74
|
-
- `url` {string|URL} - Absolute or relative URL
|
|
75
|
-
- Returns: {Promise} Fulfills with a response whose `data` is the raw HTML string
|
|
76
|
-
|
|
77
|
-
Retrieves the HTML content of the page.
|
|
78
|
-
|
|
79
|
-
```json
|
|
80
|
-
{
|
|
81
|
-
"data": "<!DOCTYPE html>...",
|
|
82
|
-
"status": 200,
|
|
83
|
-
"statusText": "OK",
|
|
84
|
-
"headers": {
|
|
85
|
-
"content-type": "text/html"
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
### `ca.meta(url)`
|
|
91
|
-
|
|
92
|
-
- `url` {string|URL} - Absolute or relative URL
|
|
93
|
-
- Returns: {Promise} Fulfills with a response whose `data` is a formatted metadata object
|
|
94
|
-
|
|
95
|
-
Fetches JCR metadata for the given page. The following transformations are applied:
|
|
96
|
-
|
|
97
|
-
- String `"true"` / `"false"` values are converted to booleans
|
|
98
|
-
- `@TypeHint` properties are removed
|
|
99
|
-
- Empty arrays are removed
|
|
100
|
-
- Date strings are converted to ISO 8601
|
|
101
|
-
- Keys are sorted alphabetically
|
|
102
|
-
- A normalized `peer` field is added when `gcAltLanguagePeer` is present
|
|
103
|
-
|
|
104
|
-
```json
|
|
105
|
-
{
|
|
106
|
-
"data": {
|
|
107
|
-
"cq:lastModified": "2022-10-25T19:16:28.000Z",
|
|
108
|
-
"fluidWidth": false,
|
|
109
|
-
"peer": "/fr/ministere-defense-nationale/feuille-erable"
|
|
110
|
-
},
|
|
111
|
-
"status": 200,
|
|
112
|
-
"statusText": "OK",
|
|
113
|
-
"headers": {
|
|
114
|
-
"content-type": "application/json"
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
### `ca.request`
|
|
120
|
-
|
|
121
|
-
- `url` {string|URL} - Absolute or relative URL
|
|
122
|
-
- `options` {RequestInit} - Optional fetch options
|
|
123
|
-
- Returns: {Promise} Fulfills with a response object
|
|
124
|
-
|
|
125
|
-
Raw HTTP client with `https://www.canada.ca` as the base URL. Use this for any requests not covered by the methods above. No URL transformation is applied. Response bodies with a `application/json` content type are automatically parsed.
|
|
126
|
-
|
|
127
|
-
```js
|
|
128
|
-
const response = await ca.request('/en/department-national-defence.html');
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
All methods return the same response shape:
|
|
132
|
-
|
|
133
|
-
```json
|
|
134
|
-
{
|
|
135
|
-
"data": "...",
|
|
136
|
-
"status": 200,
|
|
137
|
-
"statusText": "OK",
|
|
138
|
-
"headers": {
|
|
139
|
-
"content-type": "text/html"
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
```
|
|
143
|
-
|
|
144
|
-
---
|
|
145
|
-
|
|
146
|
-
# canada-api
|
|
147
|
-
|
|
148
|
-
[](https://www.npmjs.com/package/canada-api) [](https://github.com/dnd-mdn/canada-api/blob/main/LICENSE.md)
|
|
149
|
-
|
|
150
|
-
API multiplateforme pour récupérer des données publiques de [canada.ca](https://www.canada.ca).
|
|
151
|
-
|
|
152
|
-
## Navigateur
|
|
153
|
-
|
|
154
|
-
```html
|
|
155
|
-
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.4"></script>
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
## Node 18+
|
|
159
|
-
|
|
160
|
-
### Installation
|
|
161
|
-
|
|
162
|
-
```shell
|
|
163
|
-
npm install canada-api
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
### Utilisation
|
|
167
|
-
|
|
168
|
-
```js
|
|
169
|
-
import ca from 'canada-api'
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
## API
|
|
173
|
-
|
|
174
|
-
### `ca.normalize(url)`
|
|
175
|
-
|
|
176
|
-
- `url` {string|URL} - URL complète ou chemin relatif (p. ex. `'/fr/page'` ou `'https://www.canada.ca/fr/page'`)
|
|
177
|
-
- Retourne: {URL} Objet URL normalisé avec un chemin nettoyé
|
|
178
|
-
|
|
179
|
-
Valide et normalise une URL de canada.ca. Supprime le préfixe `/content/canadasite`, les extensions de fichier et les barres obliques finales.
|
|
180
|
-
|
|
181
|
-
Lève {TypeError} si `url` n'est pas une chaîne ou un objet URL.
|
|
182
|
-
Lève {Error} si l'URL n'est pas sur canada.ca ou si le chemin ne commence pas par `/en/` ou `/fr/`.
|
|
183
|
-
|
|
184
|
-
### `ca.children(url)`
|
|
185
|
-
|
|
186
|
-
- `url` {string|URL} - URL absolue ou relative
|
|
187
|
-
- Retourne: {Promise} Résout avec une réponse dont `data` est un tableau d'entrées du plan de site
|
|
188
|
-
|
|
189
|
-
Récupère et analyse le plan de site de la page donnée, retournant ses pages enfants. Les entrées sans élément `<loc>` sont ignorées.
|
|
190
|
-
|
|
191
|
-
```json
|
|
192
|
-
{
|
|
193
|
-
"data": [
|
|
194
|
-
{
|
|
195
|
-
"path": "/fr/ministere-defense-nationale/feuille-erable",
|
|
196
|
-
"lastmod": "2022-09-20T00:00:00.000Z"
|
|
197
|
-
}
|
|
198
|
-
],
|
|
199
|
-
"status": 200,
|
|
200
|
-
"statusText": "OK",
|
|
201
|
-
"headers": {
|
|
202
|
-
"content-type": "text/xml"
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
### `ca.content(url)`
|
|
208
|
-
|
|
209
|
-
- `url` {string|URL} - URL absolue ou relative
|
|
210
|
-
- Retourne: {Promise} Résout avec une réponse dont `data` est la chaîne HTML brute
|
|
211
|
-
|
|
212
|
-
Récupère le contenu HTML de la page.
|
|
213
|
-
|
|
214
|
-
```json
|
|
215
|
-
{
|
|
216
|
-
"data": "<!DOCTYPE html>...",
|
|
217
|
-
"status": 200,
|
|
218
|
-
"statusText": "OK",
|
|
219
|
-
"headers": {
|
|
220
|
-
"content-type": "text/html"
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
### `ca.meta(url)`
|
|
226
|
-
|
|
227
|
-
- `url` {string|URL} - URL absolue ou relative
|
|
228
|
-
- Retourne: {Promise} Résout avec une réponse dont `data` est un objet de métadonnées formaté
|
|
229
|
-
|
|
230
|
-
Récupère les métadonnées JCR de la page donnée. Les transformations suivantes sont appliquées :
|
|
231
|
-
|
|
232
|
-
- Les valeurs `"true"` / `"false"` sont converties en booléens
|
|
233
|
-
- Les propriétés `@TypeHint` sont supprimées
|
|
234
|
-
- Les tableaux vides sont supprimés
|
|
235
|
-
- Les chaînes de date sont converties en ISO 8601
|
|
236
|
-
- Les clés sont triées alphabétiquement
|
|
237
|
-
- Un champ `peer` normalisé est ajouté lorsque `gcAltLanguagePeer` est présent
|
|
238
|
-
|
|
239
|
-
```json
|
|
240
|
-
{
|
|
241
|
-
"data": {
|
|
242
|
-
"cq:lastModified": "2022-10-25T19:16:28.000Z",
|
|
243
|
-
"fluidWidth": false,
|
|
244
|
-
"peer": "/en/department-national-defence/maple-leaf"
|
|
245
|
-
},
|
|
246
|
-
"status": 200,
|
|
247
|
-
"statusText": "OK",
|
|
248
|
-
"headers": {
|
|
249
|
-
"content-type": "application/json"
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
```
|
|
253
|
-
|
|
254
|
-
### `ca.request`
|
|
255
|
-
|
|
256
|
-
- `url` {string|URL} - URL absolue ou relative
|
|
257
|
-
- `options` {RequestInit} - Options fetch optionnelles
|
|
258
|
-
- Retourne: {Promise} Résout avec un objet réponse
|
|
259
|
-
|
|
260
|
-
Client HTTP brut avec `https://www.canada.ca` comme URL de base. Utilisez-le pour toute requête non couverte par les méthodes ci-dessus. Aucune transformation d'URL n'est appliquée. Les corps de réponse avec un type de contenu `application/json` sont automatiquement analysés.
|
|
261
|
-
|
|
262
|
-
```js
|
|
263
|
-
const response = await ca.request('/fr/ministere-defense-nationale.html');
|
|
264
|
-
```
|
|
265
|
-
|
|
266
|
-
Toutes les méthodes retournent la même structure de réponse :
|
|
267
|
-
|
|
268
|
-
```json
|
|
269
|
-
{
|
|
270
|
-
"data": "...",
|
|
271
|
-
"status": 200,
|
|
272
|
-
"statusText": "OK",
|
|
273
|
-
"headers": {
|
|
274
|
-
"content-type": "text/html"
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
```
|
|
1
|
+
([Français](#canada-api-1))
|
|
2
|
+
|
|
3
|
+
# canada-api
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/canada-api) [](https://github.com/dnd-mdn/canada-api/blob/main/LICENSE.md)
|
|
6
|
+
|
|
7
|
+
Cross platform API for fetching public data from [canada.ca](https://www.canada.ca).
|
|
8
|
+
|
|
9
|
+
## Browser
|
|
10
|
+
|
|
11
|
+
```html
|
|
12
|
+
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.5"></script>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Node 18+
|
|
16
|
+
|
|
17
|
+
### Install
|
|
18
|
+
|
|
19
|
+
```shell
|
|
20
|
+
npm install canada-api
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Usage
|
|
24
|
+
|
|
25
|
+
```js
|
|
26
|
+
import ca from 'canada-api'
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Testing
|
|
30
|
+
|
|
31
|
+
```shell
|
|
32
|
+
npm test
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Tests use the built-in Node.js test runner (`node:test`) and require Node 18 or later.
|
|
36
|
+
|
|
37
|
+
## API
|
|
38
|
+
|
|
39
|
+
### `ca.normalize(url)`
|
|
40
|
+
|
|
41
|
+
- `url` {string|URL} - Full URL or relative path (e.g. `'/en/page'` or `'https://www.canada.ca/en/page'`)
|
|
42
|
+
- Returns: {URL} Normalized URL object with cleaned pathname
|
|
43
|
+
|
|
44
|
+
Validates and normalizes a canada.ca URL. Strips the `/content/canadasite` prefix, file extensions, and trailing slashes.
|
|
45
|
+
|
|
46
|
+
Throws {TypeError} if `url` is not a string or URL object.
|
|
47
|
+
Throws {Error} if the URL is not on canada.ca or the path does not start with `/en/` or `/fr/`.
|
|
48
|
+
|
|
49
|
+
### `ca.children(url)`
|
|
50
|
+
|
|
51
|
+
- `url` {string|URL} - Absolute or relative URL
|
|
52
|
+
- Returns: {Promise} Fulfills with a response whose `data` is an array of sitemap entries
|
|
53
|
+
|
|
54
|
+
Fetches and parses the sitemap for the given page, returning its child pages. Entries without a `<loc>` element are skipped.
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"data": [
|
|
59
|
+
{
|
|
60
|
+
"path": "/en/department-national-defence/maple-leaf",
|
|
61
|
+
"lastmod": "2022-09-20T00:00:00.000Z"
|
|
62
|
+
}
|
|
63
|
+
],
|
|
64
|
+
"status": 200,
|
|
65
|
+
"statusText": "OK",
|
|
66
|
+
"headers": {
|
|
67
|
+
"content-type": "text/xml"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### `ca.content(url)`
|
|
73
|
+
|
|
74
|
+
- `url` {string|URL} - Absolute or relative URL
|
|
75
|
+
- Returns: {Promise} Fulfills with a response whose `data` is the raw HTML string
|
|
76
|
+
|
|
77
|
+
Retrieves the HTML content of the page.
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"data": "<!DOCTYPE html>...",
|
|
82
|
+
"status": 200,
|
|
83
|
+
"statusText": "OK",
|
|
84
|
+
"headers": {
|
|
85
|
+
"content-type": "text/html"
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### `ca.meta(url)`
|
|
91
|
+
|
|
92
|
+
- `url` {string|URL} - Absolute or relative URL
|
|
93
|
+
- Returns: {Promise} Fulfills with a response whose `data` is a formatted metadata object
|
|
94
|
+
|
|
95
|
+
Fetches JCR metadata for the given page. The following transformations are applied:
|
|
96
|
+
|
|
97
|
+
- String `"true"` / `"false"` values are converted to booleans
|
|
98
|
+
- `@TypeHint` properties are removed
|
|
99
|
+
- Empty arrays are removed
|
|
100
|
+
- Date strings are converted to ISO 8601
|
|
101
|
+
- Keys are sorted alphabetically
|
|
102
|
+
- A normalized `peer` field is added when `gcAltLanguagePeer` is present
|
|
103
|
+
|
|
104
|
+
```json
|
|
105
|
+
{
|
|
106
|
+
"data": {
|
|
107
|
+
"cq:lastModified": "2022-10-25T19:16:28.000Z",
|
|
108
|
+
"fluidWidth": false,
|
|
109
|
+
"peer": "/fr/ministere-defense-nationale/feuille-erable"
|
|
110
|
+
},
|
|
111
|
+
"status": 200,
|
|
112
|
+
"statusText": "OK",
|
|
113
|
+
"headers": {
|
|
114
|
+
"content-type": "application/json"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### `ca.request`
|
|
120
|
+
|
|
121
|
+
- `url` {string|URL} - Absolute or relative URL
|
|
122
|
+
- `options` {RequestInit} - Optional fetch options
|
|
123
|
+
- Returns: {Promise} Fulfills with a response object
|
|
124
|
+
|
|
125
|
+
Raw HTTP client with `https://www.canada.ca` as the base URL. Use this for any requests not covered by the methods above. No URL transformation is applied. Response bodies with a `application/json` content type are automatically parsed.
|
|
126
|
+
|
|
127
|
+
```js
|
|
128
|
+
const response = await ca.request('/en/department-national-defence.html');
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
All methods return the same response shape:
|
|
132
|
+
|
|
133
|
+
```json
|
|
134
|
+
{
|
|
135
|
+
"data": "...",
|
|
136
|
+
"status": 200,
|
|
137
|
+
"statusText": "OK",
|
|
138
|
+
"headers": {
|
|
139
|
+
"content-type": "text/html"
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
# canada-api
|
|
147
|
+
|
|
148
|
+
[](https://www.npmjs.com/package/canada-api) [](https://github.com/dnd-mdn/canada-api/blob/main/LICENSE.md)
|
|
149
|
+
|
|
150
|
+
API multiplateforme pour récupérer des données publiques de [canada.ca](https://www.canada.ca).
|
|
151
|
+
|
|
152
|
+
## Navigateur
|
|
153
|
+
|
|
154
|
+
```html
|
|
155
|
+
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.4"></script>
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Node 18+
|
|
159
|
+
|
|
160
|
+
### Installation
|
|
161
|
+
|
|
162
|
+
```shell
|
|
163
|
+
npm install canada-api
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Utilisation
|
|
167
|
+
|
|
168
|
+
```js
|
|
169
|
+
import ca from 'canada-api'
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## API
|
|
173
|
+
|
|
174
|
+
### `ca.normalize(url)`
|
|
175
|
+
|
|
176
|
+
- `url` {string|URL} - URL complète ou chemin relatif (p. ex. `'/fr/page'` ou `'https://www.canada.ca/fr/page'`)
|
|
177
|
+
- Retourne: {URL} Objet URL normalisé avec un chemin nettoyé
|
|
178
|
+
|
|
179
|
+
Valide et normalise une URL de canada.ca. Supprime le préfixe `/content/canadasite`, les extensions de fichier et les barres obliques finales.
|
|
180
|
+
|
|
181
|
+
Lève {TypeError} si `url` n'est pas une chaîne ou un objet URL.
|
|
182
|
+
Lève {Error} si l'URL n'est pas sur canada.ca ou si le chemin ne commence pas par `/en/` ou `/fr/`.
|
|
183
|
+
|
|
184
|
+
### `ca.children(url)`
|
|
185
|
+
|
|
186
|
+
- `url` {string|URL} - URL absolue ou relative
|
|
187
|
+
- Retourne: {Promise} Résout avec une réponse dont `data` est un tableau d'entrées du plan de site
|
|
188
|
+
|
|
189
|
+
Récupère et analyse le plan de site de la page donnée, retournant ses pages enfants. Les entrées sans élément `<loc>` sont ignorées.
|
|
190
|
+
|
|
191
|
+
```json
|
|
192
|
+
{
|
|
193
|
+
"data": [
|
|
194
|
+
{
|
|
195
|
+
"path": "/fr/ministere-defense-nationale/feuille-erable",
|
|
196
|
+
"lastmod": "2022-09-20T00:00:00.000Z"
|
|
197
|
+
}
|
|
198
|
+
],
|
|
199
|
+
"status": 200,
|
|
200
|
+
"statusText": "OK",
|
|
201
|
+
"headers": {
|
|
202
|
+
"content-type": "text/xml"
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### `ca.content(url)`
|
|
208
|
+
|
|
209
|
+
- `url` {string|URL} - URL absolue ou relative
|
|
210
|
+
- Retourne: {Promise} Résout avec une réponse dont `data` est la chaîne HTML brute
|
|
211
|
+
|
|
212
|
+
Récupère le contenu HTML de la page.
|
|
213
|
+
|
|
214
|
+
```json
|
|
215
|
+
{
|
|
216
|
+
"data": "<!DOCTYPE html>...",
|
|
217
|
+
"status": 200,
|
|
218
|
+
"statusText": "OK",
|
|
219
|
+
"headers": {
|
|
220
|
+
"content-type": "text/html"
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### `ca.meta(url)`
|
|
226
|
+
|
|
227
|
+
- `url` {string|URL} - URL absolue ou relative
|
|
228
|
+
- Retourne: {Promise} Résout avec une réponse dont `data` est un objet de métadonnées formaté
|
|
229
|
+
|
|
230
|
+
Récupère les métadonnées JCR de la page donnée. Les transformations suivantes sont appliquées :
|
|
231
|
+
|
|
232
|
+
- Les valeurs `"true"` / `"false"` sont converties en booléens
|
|
233
|
+
- Les propriétés `@TypeHint` sont supprimées
|
|
234
|
+
- Les tableaux vides sont supprimés
|
|
235
|
+
- Les chaînes de date sont converties en ISO 8601
|
|
236
|
+
- Les clés sont triées alphabétiquement
|
|
237
|
+
- Un champ `peer` normalisé est ajouté lorsque `gcAltLanguagePeer` est présent
|
|
238
|
+
|
|
239
|
+
```json
|
|
240
|
+
{
|
|
241
|
+
"data": {
|
|
242
|
+
"cq:lastModified": "2022-10-25T19:16:28.000Z",
|
|
243
|
+
"fluidWidth": false,
|
|
244
|
+
"peer": "/en/department-national-defence/maple-leaf"
|
|
245
|
+
},
|
|
246
|
+
"status": 200,
|
|
247
|
+
"statusText": "OK",
|
|
248
|
+
"headers": {
|
|
249
|
+
"content-type": "application/json"
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### `ca.request`
|
|
255
|
+
|
|
256
|
+
- `url` {string|URL} - URL absolue ou relative
|
|
257
|
+
- `options` {RequestInit} - Options fetch optionnelles
|
|
258
|
+
- Retourne: {Promise} Résout avec un objet réponse
|
|
259
|
+
|
|
260
|
+
Client HTTP brut avec `https://www.canada.ca` comme URL de base. Utilisez-le pour toute requête non couverte par les méthodes ci-dessus. Aucune transformation d'URL n'est appliquée. Les corps de réponse avec un type de contenu `application/json` sont automatiquement analysés.
|
|
261
|
+
|
|
262
|
+
```js
|
|
263
|
+
const response = await ca.request('/fr/ministere-defense-nationale.html');
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
Toutes les méthodes retournent la même structure de réponse :
|
|
267
|
+
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"data": "...",
|
|
271
|
+
"status": 200,
|
|
272
|
+
"statusText": "OK",
|
|
273
|
+
"headers": {
|
|
274
|
+
"content-type": "text/html"
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
```
|
package/dist/ca.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("ca",[],e):"object"==typeof exports?exports.ca=e():t.ca=e()}(Object("undefined"!=typeof self?self:this),()=>(()=>{"use strict";var t={d:(e,a)=>{for(var r in a)t.o(a,r)&&!t.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:a[r]})},o:(t,e)=>Object.prototype.hasOwnProperty.call(t,e)},e={};t.d(e,{default:()=>c});const a="https://www.canada.ca",r=t=>{if("string"==typeof t)t=new URL(t,a);else{if(!(t instanceof URL))throw new TypeError("string or URL object expected");t=new URL(t.href)}if(t.origin!==a)throw new Error("URL must start with "+a);if(t.pathname=t.pathname.replace(/^\/content\/canadasite/,""),t.pathname=t.pathname.replace(/\.[^/]*$/,"").replace(/\/+$/,""),!t.pathname.startsWith("/en/")&&!t.pathname.startsWith("/fr/"))throw new Error(`Invalid path: "${t.pathname}" must start with /en/ or /fr/`);return t},n=async(t,e={})=>{
|
|
1
|
+
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("ca",[],e):"object"==typeof exports?exports.ca=e():t.ca=e()}(Object("undefined"!=typeof self?self:this),()=>(()=>{"use strict";var t={d:(e,a)=>{for(var r in a)t.o(a,r)&&!t.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:a[r]})},o:(t,e)=>Object.prototype.hasOwnProperty.call(t,e)},e={};t.d(e,{default:()=>c});const a="https://www.canada.ca",r=t=>{if("string"==typeof t)t=new URL(t,a);else{if(!(t instanceof URL))throw new TypeError("string or URL object expected");t=new URL(t.href)}if(t.origin!==a)throw new Error("URL must start with "+a);if(t.pathname=t.pathname.replace(/^\/content\/canadasite/,""),t.pathname=t.pathname.replace(/\.[^/]*$/,"").replace(/\/+$/,""),!t.pathname.startsWith("/en/")&&!t.pathname.startsWith("/fr/"))throw new Error(`Invalid path: "${t.pathname}" must start with /en/ or /fr/`);return t},n=async(t,e={})=>{t=new URL(t,a);const{headers:r={},...n}=e;let o;try{o=await fetch(t,{signal:AbortSignal.timeout(3e4),...n,headers:{"User-Agent":"canada-api/5.1.5",Accept:"*/*",...r}})}catch(e){throw e.url=t.toString(),e}if(!o.ok){const e=new Error(`${o.status} ${o.statusText}`);throw e.url=t.toString(),e}let s=await o.text();const c=o.headers.get("content-type")?.includes("application/json");if(c)try{s=JSON.parse(s)}catch(e){throw e.url=t.toString(),e}return{data:s,status:o.status,statusText:o.statusText,headers:Object.fromEntries(o.headers)}},o={Jan:"01",Feb:"02",Mar:"03",Apr:"04",May:"05",Jun:"06",Jul:"07",Aug:"08",Sep:"09",Oct:"10",Nov:"11",Dec:"12"};function s(t){if(/^\d{4}-\d{2}-\d{2}$/.test(t))return new Date(t).toISOString();let e=/^\w{3} (\w{3}) (\d{2}) (\d{4}) ([\d:]{8}) GMT([\-+]\d{4})$/.exec(t);return e?new Date(`${e[3]}-${o[e[1]]}-${e[2]}T${e[4]}${e[5]}`).toISOString():t}const c={normalize:r,request:n,children:async t=>{const e=r(t);e.pathname+=".sitemap.xml",e.searchParams.set("_",Date.now());const a=await n(e,{redirect:"error"});return a.data=[...a.data.matchAll(/<url>([\s\S]*?)<\/url>/g)].map(([,t])=>{const e=t.match(/<loc>([\s\S]*?)<\/loc>/)?.[1],a=t.match(/<lastmod>([\s\S]*?)<\/lastmod>/)?.[1];return{loc:e,lastmod:a}}).filter(t=>t.loc).map(t=>({path:r(t.loc).pathname,lastmod:t.lastmod?new Date(t.lastmod).toISOString():null})),a},content:async t=>{const e=r(t);return e.pathname+=".html",e.searchParams.set("_",Date.now()),n(e,{signal:AbortSignal.timeout(1e4),redirect:"error"})},meta:async t=>{const e=r(t);e.pathname+="/_jcr_content.json",e.searchParams.set("_",Date.now());const a=await n(e,{signal:AbortSignal.timeout(1e4),redirect:"error"});return a.data=(t=>{const e={};for(const[a,n]of Object.entries(t))a.endsWith("@TypeHint")||Array.isArray(n)&&0===n.length||("true"===n?e[a]=!0:"false"===n?e[a]=!1:"gcAltLanguagePeer"===a?(e[a]=n,e.peer=r(n).pathname):e[a]="string"==typeof n?s(n.trim()):n);return Object.keys(e).sort().reduce((t,a)=>(t[a]=e[a],t),{})})(a.data),a}};return e.default})());
|
package/package.json
CHANGED
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "canada-api",
|
|
3
|
-
"version": "5.1.
|
|
4
|
-
"description": "Cross platform API to fetch data from canada.ca",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "src/index.js",
|
|
7
|
-
"browser": "dist/ca.js",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"browser": "./dist/ca.js",
|
|
11
|
-
"default": "./src/index.js"
|
|
12
|
-
}
|
|
13
|
-
},
|
|
14
|
-
"files": [
|
|
15
|
-
"src",
|
|
16
|
-
"dist"
|
|
17
|
-
],
|
|
18
|
-
"scripts": {
|
|
19
|
-
"test": "node --test tests/*.test.js",
|
|
20
|
-
"test:integration": "node --test tests/integration/*.js",
|
|
21
|
-
"build": "webpack",
|
|
22
|
-
"dev": "webpack --mode development"
|
|
23
|
-
},
|
|
24
|
-
"author": "National Defence",
|
|
25
|
-
"license": "MIT",
|
|
26
|
-
"engines": {
|
|
27
|
-
"node": ">=18"
|
|
28
|
-
},
|
|
29
|
-
"keywords": [
|
|
30
|
-
"canada",
|
|
31
|
-
"api",
|
|
32
|
-
"fetch"
|
|
33
|
-
],
|
|
34
|
-
"homepage": "https://github.com/dnd-mdn/canada-api#readme",
|
|
35
|
-
"bugs": "https://github.com/dnd-mdn/canada-api/issues",
|
|
36
|
-
"repository": {
|
|
37
|
-
"type": "git",
|
|
38
|
-
"url": "https://github.com/dnd-mdn/canada-api.git"
|
|
39
|
-
},
|
|
40
|
-
"devDependencies": {
|
|
41
|
-
"webpack": "^5.105.4",
|
|
42
|
-
"webpack-cli": "^7.0.2"
|
|
43
|
-
}
|
|
44
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "canada-api",
|
|
3
|
+
"version": "5.1.5",
|
|
4
|
+
"description": "Cross platform API to fetch data from canada.ca",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"browser": "dist/ca.js",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"browser": "./dist/ca.js",
|
|
11
|
+
"default": "./src/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"src",
|
|
16
|
+
"dist"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"test": "node --test tests/*.test.js",
|
|
20
|
+
"test:integration": "node --test tests/integration/*.js",
|
|
21
|
+
"build": "webpack",
|
|
22
|
+
"dev": "webpack --mode development"
|
|
23
|
+
},
|
|
24
|
+
"author": "National Defence",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=18"
|
|
28
|
+
},
|
|
29
|
+
"keywords": [
|
|
30
|
+
"canada",
|
|
31
|
+
"api",
|
|
32
|
+
"fetch"
|
|
33
|
+
],
|
|
34
|
+
"homepage": "https://github.com/dnd-mdn/canada-api#readme",
|
|
35
|
+
"bugs": "https://github.com/dnd-mdn/canada-api/issues",
|
|
36
|
+
"repository": {
|
|
37
|
+
"type": "git",
|
|
38
|
+
"url": "https://github.com/dnd-mdn/canada-api.git"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"webpack": "^5.105.4",
|
|
42
|
+
"webpack-cli": "^7.0.2"
|
|
43
|
+
}
|
|
44
|
+
}
|
package/src/children.js
CHANGED
|
@@ -1,49 +1,49 @@
|
|
|
1
|
-
import normalize from "./normalize.js";
|
|
2
|
-
import request from "./request.js";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Represents a single URL entry from a sitemap
|
|
6
|
-
* @typedef {object} SitemapEntry
|
|
7
|
-
* @property {string} path - The normalized URL path (e.g., '/en/page')
|
|
8
|
-
* @property {string|null} lastmod - ISO 8601 timestamp or null if not present
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Parse XML sitemap data into structured URL entries
|
|
13
|
-
* @param {string} data - Raw XML sitemap content
|
|
14
|
-
* @returns {SitemapEntry[]} Array of sitemap entries with path and lastmod. Entries missing a `<loc>` element are skipped.
|
|
15
|
-
*/
|
|
16
|
-
export const parseSitemap = (xml) => {
|
|
17
|
-
return [...xml.matchAll(/<url>([\s\S]*?)<\/url>/g)]
|
|
18
|
-
.map(([, inner]) => {
|
|
19
|
-
const loc = inner.match(/<loc>([\s\S]*?)<\/loc>/)?.[1];
|
|
20
|
-
const lastmod = inner.match(/<lastmod>([\s\S]*?)<\/lastmod>/)?.[1];
|
|
21
|
-
return { loc, lastmod };
|
|
22
|
-
})
|
|
23
|
-
.filter(item => item.loc)
|
|
24
|
-
.map(item => ({
|
|
25
|
-
path: normalize(item.loc).pathname,
|
|
26
|
-
lastmod: item.lastmod ? new Date(item.lastmod).toISOString() : null,
|
|
27
|
-
}));
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Fetch and parse sitemap children for a canada.ca page
|
|
32
|
-
* @param {string|URL} url - Absolute or relative URL
|
|
33
|
-
* @returns {Promise<{data: SitemapEntry[], status: number, statusText: string, headers: object}>}
|
|
34
|
-
* @throws {Error} If the request fails or returns a non-2xx status
|
|
35
|
-
*/
|
|
36
|
-
const children = async (url) => {
|
|
37
|
-
const target = normalize(url);
|
|
38
|
-
target.pathname += '.sitemap.xml';
|
|
39
|
-
target.searchParams.set('_', Date.now());
|
|
40
|
-
|
|
41
|
-
const response = await request(target, {
|
|
42
|
-
redirect: 'error'
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
response.data = parseSitemap(response.data);
|
|
46
|
-
return response;
|
|
47
|
-
};
|
|
48
|
-
|
|
1
|
+
import normalize from "./normalize.js";
|
|
2
|
+
import request from "./request.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Represents a single URL entry from a sitemap
|
|
6
|
+
* @typedef {object} SitemapEntry
|
|
7
|
+
* @property {string} path - The normalized URL path (e.g., '/en/page')
|
|
8
|
+
* @property {string|null} lastmod - ISO 8601 timestamp or null if not present
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Parse XML sitemap data into structured URL entries
|
|
13
|
+
* @param {string} data - Raw XML sitemap content
|
|
14
|
+
* @returns {SitemapEntry[]} Array of sitemap entries with path and lastmod. Entries missing a `<loc>` element are skipped.
|
|
15
|
+
*/
|
|
16
|
+
export const parseSitemap = (xml) => {
|
|
17
|
+
return [...xml.matchAll(/<url>([\s\S]*?)<\/url>/g)]
|
|
18
|
+
.map(([, inner]) => {
|
|
19
|
+
const loc = inner.match(/<loc>([\s\S]*?)<\/loc>/)?.[1];
|
|
20
|
+
const lastmod = inner.match(/<lastmod>([\s\S]*?)<\/lastmod>/)?.[1];
|
|
21
|
+
return { loc, lastmod };
|
|
22
|
+
})
|
|
23
|
+
.filter(item => item.loc)
|
|
24
|
+
.map(item => ({
|
|
25
|
+
path: normalize(item.loc).pathname,
|
|
26
|
+
lastmod: item.lastmod ? new Date(item.lastmod).toISOString() : null,
|
|
27
|
+
}));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Fetch and parse sitemap children for a canada.ca page
|
|
32
|
+
* @param {string|URL} url - Absolute or relative URL
|
|
33
|
+
* @returns {Promise<{data: SitemapEntry[], status: number, statusText: string, headers: object}>}
|
|
34
|
+
* @throws {Error} If the request fails or returns a non-2xx status
|
|
35
|
+
*/
|
|
36
|
+
const children = async (url) => {
|
|
37
|
+
const target = normalize(url);
|
|
38
|
+
target.pathname += '.sitemap.xml';
|
|
39
|
+
target.searchParams.set('_', Date.now());
|
|
40
|
+
|
|
41
|
+
const response = await request(target, {
|
|
42
|
+
redirect: 'error'
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
response.data = parseSitemap(response.data);
|
|
46
|
+
return response;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
49
|
export default children;
|
package/src/content.js
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
import normalize from "./normalize.js";
|
|
2
|
-
import request from "./request.js";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Fetch HTML content for a canada.ca page
|
|
6
|
-
* @param {string|URL} url - Absolute or relative URL
|
|
7
|
-
* @returns {Promise<{data: string, status: number, statusText: string, headers: object}>}
|
|
8
|
-
* @throws {Error} If the request fails or returns a non-2xx status
|
|
9
|
-
*/
|
|
10
|
-
const content = async (url) => {
|
|
11
|
-
const target = normalize(url);
|
|
12
|
-
target.pathname += '.html';
|
|
13
|
-
target.searchParams.set('_', Date.now());
|
|
14
|
-
|
|
15
|
-
return request(target, {
|
|
16
|
-
signal: AbortSignal.timeout(10000),
|
|
17
|
-
redirect: 'error'
|
|
18
|
-
});
|
|
19
|
-
};
|
|
20
|
-
|
|
1
|
+
import normalize from "./normalize.js";
|
|
2
|
+
import request from "./request.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Fetch HTML content for a canada.ca page
|
|
6
|
+
* @param {string|URL} url - Absolute or relative URL
|
|
7
|
+
* @returns {Promise<{data: string, status: number, statusText: string, headers: object}>}
|
|
8
|
+
* @throws {Error} If the request fails or returns a non-2xx status
|
|
9
|
+
*/
|
|
10
|
+
const content = async (url) => {
|
|
11
|
+
const target = normalize(url);
|
|
12
|
+
target.pathname += '.html';
|
|
13
|
+
target.searchParams.set('_', Date.now());
|
|
14
|
+
|
|
15
|
+
return request(target, {
|
|
16
|
+
signal: AbortSignal.timeout(10000),
|
|
17
|
+
redirect: 'error'
|
|
18
|
+
});
|
|
19
|
+
};
|
|
20
|
+
|
|
21
21
|
export default content;
|
package/src/index.js
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import normalize from "./normalize.js";
|
|
2
|
-
import request from "./request.js";
|
|
3
|
-
import children from "./children.js";
|
|
4
|
-
import content from "./content.js";
|
|
5
|
-
import meta from "./meta.js";
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* @typedef {object} CanadaAPI
|
|
9
|
-
* @property {function} normalize - Normalize and validate canada.ca URLs
|
|
10
|
-
* @property {function} request - Raw HTTP client for canada.ca requests
|
|
11
|
-
* @property {function} children - Fetch and parse sitemap hierarchies
|
|
12
|
-
* @property {function} content - Fetch HTML content pages
|
|
13
|
-
* @property {function} meta - Fetch and format JCR metadata
|
|
14
|
-
*/
|
|
15
|
-
|
|
16
|
-
/** @type {CanadaAPI} */
|
|
17
|
-
const ca = {
|
|
18
|
-
normalize,
|
|
19
|
-
request,
|
|
20
|
-
children,
|
|
21
|
-
content,
|
|
22
|
-
meta
|
|
23
|
-
}
|
|
24
|
-
|
|
1
|
+
import normalize from "./normalize.js";
|
|
2
|
+
import request from "./request.js";
|
|
3
|
+
import children from "./children.js";
|
|
4
|
+
import content from "./content.js";
|
|
5
|
+
import meta from "./meta.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @typedef {object} CanadaAPI
|
|
9
|
+
* @property {function} normalize - Normalize and validate canada.ca URLs
|
|
10
|
+
* @property {function} request - Raw HTTP client for canada.ca requests
|
|
11
|
+
* @property {function} children - Fetch and parse sitemap hierarchies
|
|
12
|
+
* @property {function} content - Fetch HTML content pages
|
|
13
|
+
* @property {function} meta - Fetch and format JCR metadata
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/** @type {CanadaAPI} */
|
|
17
|
+
const ca = {
|
|
18
|
+
normalize,
|
|
19
|
+
request,
|
|
20
|
+
children,
|
|
21
|
+
content,
|
|
22
|
+
meta
|
|
23
|
+
}
|
|
24
|
+
|
|
25
25
|
export default ca
|
package/src/meta.js
CHANGED
|
@@ -1,103 +1,103 @@
|
|
|
1
|
-
import normalize from "./normalize.js";
|
|
2
|
-
import request from "./request.js";
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Month name to number mapping
|
|
6
|
-
* @const {Record<string, string>}
|
|
7
|
-
* @private
|
|
8
|
-
*/
|
|
9
|
-
const months = {
|
|
10
|
-
'Jan': '01',
|
|
11
|
-
'Feb': '02',
|
|
12
|
-
'Mar': '03',
|
|
13
|
-
'Apr': '04',
|
|
14
|
-
'May': '05',
|
|
15
|
-
'Jun': '06',
|
|
16
|
-
'Jul': '07',
|
|
17
|
-
'Aug': '08',
|
|
18
|
-
'Sep': '09',
|
|
19
|
-
'Oct': '10',
|
|
20
|
-
'Nov': '11',
|
|
21
|
-
'Dec': '12'
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* Try to parse and format date strings from JCR into ISO 8601
|
|
26
|
-
* @param {string} text - Potential date string to format
|
|
27
|
-
* @returns {string} ISO 8601 timestamp or original text if not a recognized date
|
|
28
|
-
* @description Supports YYYY-MM-DD and JCR date format (e.g. "Wed Nov 20 2019 13:17:13 GMT-0500").
|
|
29
|
-
* Uses explicit parsing to ensure consistent output across Node.js and browsers.
|
|
30
|
-
* @private
|
|
31
|
-
*/
|
|
32
|
-
function formatDate(text) {
|
|
33
|
-
// Simple YYYY-MM-DD format
|
|
34
|
-
if (/^\d{4}-\d{2}-\d{2}$/.test(text)) {
|
|
35
|
-
return new Date(text).toISOString()
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// RFC1123 format
|
|
39
|
-
let m = /^\w{3} (\w{3}) (\d{2}) (\d{4}) ([\d:]{8}) GMT([\-+]\d{4})$/.exec(text)
|
|
40
|
-
if (m) {
|
|
41
|
-
return new Date(`${m[3]}-${months[m[1]]}-${m[2]}T${m[4]}${m[5]}`).toISOString()
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
return text
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
/**
|
|
48
|
-
* Format and normalize metadata object
|
|
49
|
-
* @param {Record<string, any>} data - Raw metadata object from JCR
|
|
50
|
-
* @returns {Record<string, any>} Formatted metadata with normalized types and sorted keys
|
|
51
|
-
* @description Converts string booleans to native booleans, formats dates to ISO 8601,
|
|
52
|
-
* removes @TypeHint properties and empty arrays, sorts keys alphabetically, and adds a
|
|
53
|
-
* normalized `peer` field when `gcAltLanguagePeer` is present.
|
|
54
|
-
*/
|
|
55
|
-
export const formatMeta = (data) => {
|
|
56
|
-
const result = {}
|
|
57
|
-
|
|
58
|
-
for (const [key, value] of Object.entries(data)) {
|
|
59
|
-
if (key.endsWith('@TypeHint')) continue
|
|
60
|
-
if (Array.isArray(value) && value.length === 0) continue
|
|
61
|
-
|
|
62
|
-
if (value === 'true') {
|
|
63
|
-
result[key] = true
|
|
64
|
-
} else if (value === 'false') {
|
|
65
|
-
result[key] = false
|
|
66
|
-
} else if (key === 'gcAltLanguagePeer') {
|
|
67
|
-
result[key] = value
|
|
68
|
-
result['peer'] = normalize(value).pathname
|
|
69
|
-
} else if (typeof value === 'string') {
|
|
70
|
-
result[key] = formatDate(value.trim())
|
|
71
|
-
} else {
|
|
72
|
-
result[key] = value
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Sort object keys alphabetically for readability
|
|
77
|
-
return Object.keys(result).sort().reduce((obj, key) => {
|
|
78
|
-
obj[key] = result[key]
|
|
79
|
-
return obj
|
|
80
|
-
}, {})
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* Fetch and format JCR metadata for a canada.ca page
|
|
85
|
-
* @param {string|URL} url - Absolute or relative URL
|
|
86
|
-
* @returns {Promise<{data: Record<string, any>, status: number, statusText: string, headers: object}>}
|
|
87
|
-
* @throws {Error} If the request fails or returns a non-2xx status
|
|
88
|
-
*/
|
|
89
|
-
const meta = async (url) => {
|
|
90
|
-
const target = normalize(url);
|
|
91
|
-
target.pathname += '/_jcr_content.json';
|
|
92
|
-
target.searchParams.set('_', Date.now());
|
|
93
|
-
|
|
94
|
-
const response = await request(target, {
|
|
95
|
-
signal: AbortSignal.timeout(10000),
|
|
96
|
-
redirect: 'error'
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
response.data = formatMeta(response.data);
|
|
100
|
-
return response;
|
|
101
|
-
};
|
|
102
|
-
|
|
1
|
+
import normalize from "./normalize.js";
|
|
2
|
+
import request from "./request.js";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Month name to number mapping
|
|
6
|
+
* @const {Record<string, string>}
|
|
7
|
+
* @private
|
|
8
|
+
*/
|
|
9
|
+
const months = {
|
|
10
|
+
'Jan': '01',
|
|
11
|
+
'Feb': '02',
|
|
12
|
+
'Mar': '03',
|
|
13
|
+
'Apr': '04',
|
|
14
|
+
'May': '05',
|
|
15
|
+
'Jun': '06',
|
|
16
|
+
'Jul': '07',
|
|
17
|
+
'Aug': '08',
|
|
18
|
+
'Sep': '09',
|
|
19
|
+
'Oct': '10',
|
|
20
|
+
'Nov': '11',
|
|
21
|
+
'Dec': '12'
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Try to parse and format date strings from JCR into ISO 8601
|
|
26
|
+
* @param {string} text - Potential date string to format
|
|
27
|
+
* @returns {string} ISO 8601 timestamp or original text if not a recognized date
|
|
28
|
+
* @description Supports YYYY-MM-DD and JCR date format (e.g. "Wed Nov 20 2019 13:17:13 GMT-0500").
|
|
29
|
+
* Uses explicit parsing to ensure consistent output across Node.js and browsers.
|
|
30
|
+
* @private
|
|
31
|
+
*/
|
|
32
|
+
function formatDate(text) {
|
|
33
|
+
// Simple YYYY-MM-DD format
|
|
34
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(text)) {
|
|
35
|
+
return new Date(text).toISOString()
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// RFC1123 format
|
|
39
|
+
let m = /^\w{3} (\w{3}) (\d{2}) (\d{4}) ([\d:]{8}) GMT([\-+]\d{4})$/.exec(text)
|
|
40
|
+
if (m) {
|
|
41
|
+
return new Date(`${m[3]}-${months[m[1]]}-${m[2]}T${m[4]}${m[5]}`).toISOString()
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return text
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Format and normalize metadata object
|
|
49
|
+
* @param {Record<string, any>} data - Raw metadata object from JCR
|
|
50
|
+
* @returns {Record<string, any>} Formatted metadata with normalized types and sorted keys
|
|
51
|
+
* @description Converts string booleans to native booleans, formats dates to ISO 8601,
|
|
52
|
+
* removes @TypeHint properties and empty arrays, sorts keys alphabetically, and adds a
|
|
53
|
+
* normalized `peer` field when `gcAltLanguagePeer` is present.
|
|
54
|
+
*/
|
|
55
|
+
export const formatMeta = (data) => {
|
|
56
|
+
const result = {}
|
|
57
|
+
|
|
58
|
+
for (const [key, value] of Object.entries(data)) {
|
|
59
|
+
if (key.endsWith('@TypeHint')) continue
|
|
60
|
+
if (Array.isArray(value) && value.length === 0) continue
|
|
61
|
+
|
|
62
|
+
if (value === 'true') {
|
|
63
|
+
result[key] = true
|
|
64
|
+
} else if (value === 'false') {
|
|
65
|
+
result[key] = false
|
|
66
|
+
} else if (key === 'gcAltLanguagePeer') {
|
|
67
|
+
result[key] = value
|
|
68
|
+
result['peer'] = normalize(value).pathname
|
|
69
|
+
} else if (typeof value === 'string') {
|
|
70
|
+
result[key] = formatDate(value.trim())
|
|
71
|
+
} else {
|
|
72
|
+
result[key] = value
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Sort object keys alphabetically for readability
|
|
77
|
+
return Object.keys(result).sort().reduce((obj, key) => {
|
|
78
|
+
obj[key] = result[key]
|
|
79
|
+
return obj
|
|
80
|
+
}, {})
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Fetch and format JCR metadata for a canada.ca page
|
|
85
|
+
* @param {string|URL} url - Absolute or relative URL
|
|
86
|
+
* @returns {Promise<{data: Record<string, any>, status: number, statusText: string, headers: object}>}
|
|
87
|
+
* @throws {Error} If the request fails or returns a non-2xx status
|
|
88
|
+
*/
|
|
89
|
+
const meta = async (url) => {
|
|
90
|
+
const target = normalize(url);
|
|
91
|
+
target.pathname += '/_jcr_content.json';
|
|
92
|
+
target.searchParams.set('_', Date.now());
|
|
93
|
+
|
|
94
|
+
const response = await request(target, {
|
|
95
|
+
signal: AbortSignal.timeout(10000),
|
|
96
|
+
redirect: 'error'
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
response.data = formatMeta(response.data);
|
|
100
|
+
return response;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
103
|
export default meta;
|
package/src/normalize.js
CHANGED
|
@@ -1,38 +1,38 @@
|
|
|
1
|
-
import { BASE_URL } from './config.js';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Normalize a canada.ca URL to a clean pathname
|
|
5
|
-
* @param {string|URL} url - A full URL or relative path (e.g., 'https://www.canada.ca/en/page' or '/en/page')
|
|
6
|
-
* @returns {URL} Normalized URL object with cleaned pathname
|
|
7
|
-
* @throws {TypeError} If url is not a string or URL object
|
|
8
|
-
* @throws {Error} If URL is not from canada.ca or path doesn't start with /en/ or /fr/
|
|
9
|
-
*/
|
|
10
|
-
const normalize = (url) => {
|
|
11
|
-
|
|
12
|
-
if (typeof url === 'string') {
|
|
13
|
-
url = new URL(url, BASE_URL)
|
|
14
|
-
} else if (url instanceof URL) {
|
|
15
|
-
url = new URL(url.href)
|
|
16
|
-
} else {
|
|
17
|
-
throw new TypeError('string or URL object expected')
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// Verify domain
|
|
21
|
-
if (url.origin !== BASE_URL) {
|
|
22
|
-
throw new Error('URL must start with ' + BASE_URL)
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
url.pathname = url.pathname.replace(/^\/content\/canadasite/, '');
|
|
26
|
-
|
|
27
|
-
// Remove file extensions (like .html, .xml) and trailing slashes
|
|
28
|
-
url.pathname = url.pathname.replace(/\.[^/]*$/, '').replace(/\/+$/, '');
|
|
29
|
-
|
|
30
|
-
// Verify root language
|
|
31
|
-
if (!url.pathname.startsWith('/en/') && !url.pathname.startsWith('/fr/')) {
|
|
32
|
-
throw new Error(`Invalid path: "${url.pathname}" must start with /en/ or /fr/`)
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
return url
|
|
36
|
-
}
|
|
37
|
-
|
|
1
|
+
import { BASE_URL } from './config.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Normalize a canada.ca URL to a clean pathname
|
|
5
|
+
* @param {string|URL} url - A full URL or relative path (e.g., 'https://www.canada.ca/en/page' or '/en/page')
|
|
6
|
+
* @returns {URL} Normalized URL object with cleaned pathname
|
|
7
|
+
* @throws {TypeError} If url is not a string or URL object
|
|
8
|
+
* @throws {Error} If URL is not from canada.ca or path doesn't start with /en/ or /fr/
|
|
9
|
+
*/
|
|
10
|
+
const normalize = (url) => {
|
|
11
|
+
|
|
12
|
+
if (typeof url === 'string') {
|
|
13
|
+
url = new URL(url, BASE_URL)
|
|
14
|
+
} else if (url instanceof URL) {
|
|
15
|
+
url = new URL(url.href)
|
|
16
|
+
} else {
|
|
17
|
+
throw new TypeError('string or URL object expected')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Verify domain
|
|
21
|
+
if (url.origin !== BASE_URL) {
|
|
22
|
+
throw new Error('URL must start with ' + BASE_URL)
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
url.pathname = url.pathname.replace(/^\/content\/canadasite/, '');
|
|
26
|
+
|
|
27
|
+
// Remove file extensions (like .html, .xml) and trailing slashes
|
|
28
|
+
url.pathname = url.pathname.replace(/\.[^/]*$/, '').replace(/\/+$/, '');
|
|
29
|
+
|
|
30
|
+
// Verify root language
|
|
31
|
+
if (!url.pathname.startsWith('/en/') && !url.pathname.startsWith('/fr/')) {
|
|
32
|
+
throw new Error(`Invalid path: "${url.pathname}" must start with /en/ or /fr/`)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return url
|
|
36
|
+
}
|
|
37
|
+
|
|
38
38
|
export default normalize;
|
package/src/request.js
CHANGED
|
@@ -1,40 +1,57 @@
|
|
|
1
|
-
import { BASE_URL } from "./config.js";
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Raw HTTP client for canada.ca
|
|
5
|
-
* @param {string|URL} url - Relative or absolute URL on canada.ca
|
|
6
|
-
* @param {RequestInit} [options] - Fetch options
|
|
7
|
-
* @returns {Promise<{data: string|object, status: number, statusText: string, headers: object}>}
|
|
8
|
-
* @throws {Error} If the request fails or returns a non-2xx status
|
|
9
|
-
*/
|
|
10
|
-
const request = async (url, options = {}) => {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
1
|
+
import { BASE_URL } from "./config.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Raw HTTP client for canada.ca
|
|
5
|
+
* @param {string|URL} url - Relative or absolute URL on canada.ca
|
|
6
|
+
* @param {RequestInit} [options] - Fetch options
|
|
7
|
+
* @returns {Promise<{data: string|object, status: number, statusText: string, headers: object}>}
|
|
8
|
+
* @throws {Error} If the request fails or returns a non-2xx status
|
|
9
|
+
*/
|
|
10
|
+
const request = async (url, options = {}) => {
|
|
11
|
+
url = new URL(url, BASE_URL);
|
|
12
|
+
|
|
13
|
+
const { headers: customHeaders = {}, ...requestOptions } = options;
|
|
14
|
+
|
|
15
|
+
let response;
|
|
16
|
+
try {
|
|
17
|
+
response = await fetch(url, {
|
|
18
|
+
signal: AbortSignal.timeout(30000),
|
|
19
|
+
...requestOptions,
|
|
20
|
+
headers: {
|
|
21
|
+
'User-Agent': 'canada-api/5.1.5',
|
|
22
|
+
'Accept': '*/*',
|
|
23
|
+
...customHeaders
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
} catch (e) {
|
|
27
|
+
e.url = url.toString();
|
|
28
|
+
throw e;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
const error = new Error(`${response.status} ${response.statusText}`);
|
|
33
|
+
error.url = url.toString();
|
|
34
|
+
throw error;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let data = await response.text();
|
|
38
|
+
const isJson = response.headers.get('content-type')?.includes('application/json');
|
|
39
|
+
|
|
40
|
+
if (isJson) {
|
|
41
|
+
try {
|
|
42
|
+
data = JSON.parse(data);
|
|
43
|
+
} catch (e) {
|
|
44
|
+
e.url = url.toString();
|
|
45
|
+
throw e;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
data,
|
|
51
|
+
status: response.status,
|
|
52
|
+
statusText: response.statusText,
|
|
53
|
+
headers: Object.fromEntries(response.headers)
|
|
54
|
+
};
|
|
55
|
+
};
|
|
56
|
+
|
|
40
57
|
export default request;
|