canada-api 5.1.6 → 5.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/ca.js +1 -1
- package/package.json +1 -1
- package/src/children.js +8 -3
- package/src/content.js +6 -3
- package/src/meta.js +8 -3
- package/src/normalize.js +11 -5
- package/src/request.js +1 -1
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ Cross platform API for fetching public data from [canada.ca](https://www.canada.
|
|
|
9
9
|
## Browser
|
|
10
10
|
|
|
11
11
|
```html
|
|
12
|
-
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.
|
|
12
|
+
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.7"></script>
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
## Node 18+
|
|
@@ -72,9 +72,9 @@ Fetches and parses the sitemap for the given page, returning its child pages. En
|
|
|
72
72
|
### `ca.content(url)`
|
|
73
73
|
|
|
74
74
|
- `url` {string|URL} - Absolute or relative URL
|
|
75
|
-
- Returns: {Promise} Fulfills with a response whose `data` is the raw HTML string
|
|
75
|
+
- Returns: {Promise} Fulfills with a response whose `data` is the raw HTML string, or parsed JSON for DAM asset URLs
|
|
76
76
|
|
|
77
|
-
Retrieves the HTML content of the page.
|
|
77
|
+
Retrieves the HTML content of the page. DAM asset URLs under `/content/dam/` are passed through without forcing a `.html` suffix.
|
|
78
78
|
|
|
79
79
|
```json
|
|
80
80
|
{
|
|
@@ -90,9 +90,9 @@ Retrieves the HTML content of the page.
|
|
|
90
90
|
### `ca.meta(url)`
|
|
91
91
|
|
|
92
92
|
- `url` {string|URL} - Absolute or relative URL
|
|
93
|
-
- Returns: {Promise} Fulfills with a response whose `data` is a formatted metadata object
|
|
93
|
+
- Returns: {Promise} Fulfills with a response whose `data` is a formatted metadata object, or parsed JSON for DAM asset URLs
|
|
94
94
|
|
|
95
|
-
Fetches JCR metadata for the given page. The following transformations are applied:
|
|
95
|
+
Fetches JCR metadata for the given page. For DAM asset URLs under `/content/dam/`, the asset JSON response is returned. The following transformations are applied to page metadata:
|
|
96
96
|
|
|
97
97
|
- String `"true"` / `"false"` values are converted to booleans
|
|
98
98
|
- `@TypeHint` properties are removed
|
|
@@ -152,7 +152,7 @@ API multiplateforme pour récupérer des données publiques de [canada.ca](https
|
|
|
152
152
|
## Navigateur
|
|
153
153
|
|
|
154
154
|
```html
|
|
155
|
-
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.
|
|
155
|
+
<script src="https://cdn.jsdelivr.net/npm/canada-api@5.1.7"></script>
|
|
156
156
|
```
|
|
157
157
|
|
|
158
158
|
## Node 18+
|
|
@@ -207,9 +207,9 @@ Récupère et analyse le plan de site de la page donnée, retournant ses pages e
|
|
|
207
207
|
### `ca.content(url)`
|
|
208
208
|
|
|
209
209
|
- `url` {string|URL} - URL absolue ou relative
|
|
210
|
-
- Retourne: {Promise} Résout avec une réponse dont `data` est la chaîne HTML brute
|
|
210
|
+
- Retourne: {Promise} Résout avec une réponse dont `data` est la chaîne HTML brute, ou le JSON analysé pour les URL d'actifs DAM
|
|
211
211
|
|
|
212
|
-
Récupère le contenu HTML de la page.
|
|
212
|
+
Récupère le contenu HTML de la page. Les URL d'actifs DAM sous `/content/dam/` sont transmises sans forcer le suffixe `.html`.
|
|
213
213
|
|
|
214
214
|
```json
|
|
215
215
|
{
|
|
@@ -225,9 +225,9 @@ Récupère le contenu HTML de la page.
|
|
|
225
225
|
### `ca.meta(url)`
|
|
226
226
|
|
|
227
227
|
- `url` {string|URL} - URL absolue ou relative
|
|
228
|
-
- Retourne: {Promise} Résout avec une réponse dont `data` est un objet de métadonnées
|
|
228
|
+
- Retourne: {Promise} Résout avec une réponse dont `data` est un objet de métadonnées formaté, ou le JSON analysé pour les URL d'actifs DAM
|
|
229
229
|
|
|
230
|
-
Récupère les métadonnées JCR de la page donnée. Les transformations suivantes sont appliquées :
|
|
230
|
+
Récupère les métadonnées JCR de la page donnée. Pour les URL d'actifs DAM sous `/content/dam/`, la réponse JSON de l'actif est retournée. Les transformations suivantes sont appliquées aux métadonnées de page :
|
|
231
231
|
|
|
232
232
|
- Les valeurs `"true"` / `"false"` sont converties en booléens
|
|
233
233
|
- Les propriétés `@TypeHint` sont supprimées
|
package/dist/ca.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("ca",[],e):"object"==typeof exports?exports.ca=e():t.ca=e()}(Object("undefined"!=typeof self?self:this),()=>(()=>{"use strict";var t={d:(e,
|
|
1
|
+
!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define("ca",[],e):"object"==typeof exports?exports.ca=e():t.ca=e()}(Object("undefined"!=typeof self?self:this),()=>(()=>{"use strict";var t={d:(e,a)=>{for(var r in a)t.o(a,r)&&!t.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:a[r]})},o:(t,e)=>Object.prototype.hasOwnProperty.call(t,e)},e={};t.d(e,{default:()=>c});const a="https://www.canada.ca",r=t=>{if("string"==typeof t)t=new URL(t,a);else{if(!(t instanceof URL))throw new TypeError("string or URL object expected");t=new URL(t.href)}if(t.origin!==a)throw new Error("URL must start with "+a);if(t.pathname.startsWith("/content/dam/"))return t;if(t.pathname=t.pathname.replace(/^\/content\/canadasite/,""),t.pathname=t.pathname.replace(/\/+$/,""),t.pathname=t.pathname.replace(/\.[^/]*$/,""),!t.pathname.startsWith("/en/")&&!t.pathname.startsWith("/fr/"))throw new Error(`Invalid path: "${t.pathname}" must start with /en/ or /fr/`);return t},n=async(t,e={})=>{(t=new URL(t,a)).searchParams.set("_",Date.now());const{headers:r={},...n}=e;let o;try{o=await fetch(t,{signal:AbortSignal.timeout(3e4),...n,headers:{"User-Agent":"canada-api/5.1.7",Accept:"*/*",...r}})}catch(e){throw e.url=t.toString(),e}if(!o.ok){const e=new Error(`Request to ${t} failed: ${o.status} ${o.statusText}`);throw e.url=t.toString(),e}let s=await o.text();const c=o.headers.get("content-type")?.includes("application/json");if(c)try{s=JSON.parse(s)}catch(e){const a=new Error(`Failed to parse JSON response from ${t}: ${e.message}`);throw a.url=t.toString(),a}return{data:s,status:o.status,statusText:o.statusText,headers:Object.fromEntries(o.headers)}},o={Jan:"01",Feb:"02",Mar:"03",Apr:"04",May:"05",Jun:"06",Jul:"07",Aug:"08",Sep:"09",Oct:"10",Nov:"11",Dec:"12"};function s(t){if(/^\d{4}-\d{2}-\d{2}$/.test(t))return new Date(t).toISOString();let e=/^\w{3} (\w{3}) (\d{2}) (\d{4}) ([\d:]{8}) GMT([\-+]\d{4})$/.exec(t);return e?new Date(`${e[3]}-${o[e[1]]}-${e[2]}T${e[4]}${e[5]}`).toISOString():t}const c={normalize:r,request:n,children:async t=>{const e=r(t);if(e.pathname.startsWith("/content/dam/"))throw new Error(`children not available for DAM assets: "${e.pathname}"`);e.pathname+=".sitemap.xml";const a=await n(e,{redirect:"error"});return a.data=[...a.data.matchAll(/<url>([\s\S]*?)<\/url>/g)].map(([,t])=>{const e=t.match(/<loc>([\s\S]*?)<\/loc>/)?.[1],a=t.match(/<lastmod>([\s\S]*?)<\/lastmod>/)?.[1];return{loc:e,lastmod:a}}).filter(t=>t.loc).map(t=>({path:r(t.loc).pathname,lastmod:t.lastmod?new Date(t.lastmod).toISOString():null})),a},content:async t=>{const e=r(t);return e.pathname.startsWith("/content/dam/")||(e.pathname+=".html"),n(e,{redirect:"error"})},meta:async t=>{const e=r(t);e.pathname.startsWith("/content/dam/")?e.pathname+="/.json":e.pathname+="/_jcr_content.json";const a=await n(e,{redirect:"error"});return a.data=(t=>{const e={};for(const[a,n]of Object.entries(t))a.endsWith("@TypeHint")||Array.isArray(n)&&0===n.length||("true"===n?e[a]=!0:"false"===n?e[a]=!1:"gcAltLanguagePeer"===a?(e[a]=n,e.peer=r(n).pathname):e[a]="string"==typeof n?s(n.trim()):n);return Object.keys(e).sort().reduce((t,a)=>(t[a]=e[a],t),{})})(a.data),a}};return e.default})());
|
package/package.json
CHANGED
package/src/children.js
CHANGED
|
@@ -10,7 +10,7 @@ import request from "./request.js";
|
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
12
|
* Parse XML sitemap data into structured URL entries
|
|
13
|
-
* @param {string}
|
|
13
|
+
* @param {string} xml - Raw XML sitemap content
|
|
14
14
|
* @returns {SitemapEntry[]} Array of sitemap entries with path and lastmod. Entries missing a `<loc>` element are skipped.
|
|
15
15
|
*/
|
|
16
16
|
export const parseSitemap = (xml) => {
|
|
@@ -29,12 +29,17 @@ export const parseSitemap = (xml) => {
|
|
|
29
29
|
|
|
30
30
|
/**
|
|
31
31
|
* Fetch and parse sitemap children for a canada.ca page
|
|
32
|
-
* @param {string|URL} url - Absolute or relative URL
|
|
32
|
+
* @param {string|URL} url - Absolute or relative URL for a canada.ca page
|
|
33
33
|
* @returns {Promise<{data: SitemapEntry[], status: number, statusText: string, headers: object}>}
|
|
34
|
-
* @throws {Error} If the
|
|
34
|
+
* @throws {Error} If the URL points to a DAM asset path or if the request fails/returns a non-2xx status
|
|
35
35
|
*/
|
|
36
36
|
const children = async (url) => {
|
|
37
37
|
const target = normalize(url);
|
|
38
|
+
|
|
39
|
+
if (target.pathname.startsWith('/content/dam/')) {
|
|
40
|
+
throw new Error(`children not available for DAM assets: "${target.pathname}"`);
|
|
41
|
+
}
|
|
42
|
+
|
|
38
43
|
target.pathname += '.sitemap.xml';
|
|
39
44
|
|
|
40
45
|
const response = await request(target, {
|
package/src/content.js
CHANGED
|
@@ -2,14 +2,17 @@ import normalize from "./normalize.js";
|
|
|
2
2
|
import request from "./request.js";
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
|
-
* Fetch
|
|
5
|
+
* Fetch content for a canada.ca page or DAM asset
|
|
6
6
|
* @param {string|URL} url - Absolute or relative URL
|
|
7
|
-
* @returns {Promise<{data: string, status: number, statusText: string, headers: object}>}
|
|
7
|
+
* @returns {Promise<{data: string|object, status: number, statusText: string, headers: object}>}
|
|
8
8
|
* @throws {Error} If the request fails or returns a non-2xx status
|
|
9
9
|
*/
|
|
10
10
|
const content = async (url) => {
|
|
11
11
|
const target = normalize(url);
|
|
12
|
-
|
|
12
|
+
|
|
13
|
+
if (!target.pathname.startsWith('/content/dam/')) {
|
|
14
|
+
target.pathname += '.html';
|
|
15
|
+
}
|
|
13
16
|
|
|
14
17
|
return request(target, {
|
|
15
18
|
redirect: 'error'
|
package/src/meta.js
CHANGED
|
@@ -81,14 +81,19 @@ export const formatMeta = (data) => {
|
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
/**
|
|
84
|
-
* Fetch and format
|
|
84
|
+
* Fetch and format metadata
|
|
85
85
|
* @param {string|URL} url - Absolute or relative URL
|
|
86
|
-
* @returns {Promise<{data: Record<string, any
|
|
86
|
+
* @returns {Promise<{data: Record<string, any>|any, status: number, statusText: string, headers: object}>}
|
|
87
87
|
* @throws {Error} If the request fails or returns a non-2xx status
|
|
88
88
|
*/
|
|
89
89
|
const meta = async (url) => {
|
|
90
90
|
const target = normalize(url);
|
|
91
|
-
|
|
91
|
+
|
|
92
|
+
if (target.pathname.startsWith('/content/dam/')) {
|
|
93
|
+
target.pathname += '/.json'
|
|
94
|
+
} else {
|
|
95
|
+
target.pathname += '/_jcr_content.json';
|
|
96
|
+
}
|
|
92
97
|
|
|
93
98
|
const response = await request(target, {
|
|
94
99
|
redirect: 'error'
|
package/src/normalize.js
CHANGED
|
@@ -2,10 +2,10 @@ import { BASE_URL } from './config.js';
|
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Normalize a canada.ca URL to a clean pathname
|
|
5
|
-
* @param {string|URL} url - A full URL or relative path (e.g., 'https://www.canada.ca/en/page'
|
|
6
|
-
* @returns {URL} Normalized URL object with cleaned pathname
|
|
5
|
+
* @param {string|URL} url - A full URL or relative path (e.g., 'https://www.canada.ca/en/page', '/en/page', or '/content/dam/...')
|
|
6
|
+
* @returns {URL} Normalized URL object with a cleaned pathname
|
|
7
7
|
* @throws {TypeError} If url is not a string or URL object
|
|
8
|
-
* @throws {Error} If URL is not from canada.ca or path doesn't start with /en/ or /
|
|
8
|
+
* @throws {Error} If URL is not from canada.ca or the path doesn't start with /en/, /fr/, or /content/dam/
|
|
9
9
|
*/
|
|
10
10
|
const normalize = (url) => {
|
|
11
11
|
|
|
@@ -22,10 +22,16 @@ const normalize = (url) => {
|
|
|
22
22
|
throw new Error('URL must start with ' + BASE_URL)
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
// No normalization for DAM asset paths
|
|
26
|
+
if (url.pathname.startsWith('/content/dam/')) {
|
|
27
|
+
return url
|
|
28
|
+
}
|
|
29
|
+
|
|
25
30
|
url.pathname = url.pathname.replace(/^\/content\/canadasite/, '');
|
|
31
|
+
url.pathname = url.pathname.replace(/\/+$/, '');
|
|
26
32
|
|
|
27
|
-
// Remove file extensions (like .html, .xml)
|
|
28
|
-
url.pathname = url.pathname.replace(/\.[^/]*$/, '')
|
|
33
|
+
// Remove file extensions (like .html, .xml)
|
|
34
|
+
url.pathname = url.pathname.replace(/\.[^/]*$/, '');
|
|
29
35
|
|
|
30
36
|
// Verify root language
|
|
31
37
|
if (!url.pathname.startsWith('/en/') && !url.pathname.startsWith('/fr/')) {
|
package/src/request.js
CHANGED