@data-fair/catalog-csw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # <img alt="Data FAIR logo" src="https://cdn.jsdelivr.net/gh/data-fair/data-fair@master/ui/public/assets/logo.svg" width="40"> @data-fair/catalog-csw
2
+
3
+ A simple CSW plugin for the Data Fair catalogs service.
package/index.ts ADDED
@@ -0,0 +1,34 @@
1
+ import type CatalogPlugin from '@data-fair/types-catalogs'
2
+ import { configSchema, assertConfigValid, type CSWConfig } from '#types'
3
+ import { type CSWCapabilities, capabilities } from './lib/capabilities.ts'
4
+
5
+ const plugin: CatalogPlugin<CSWConfig, CSWCapabilities> = {
6
+ async prepare (context) {
7
+ if (context.catalogConfig.url) {
8
+ context.catalogConfig.url = context.catalogConfig.url.trim()
9
+ }
10
+ return context
11
+ },
12
+ async list (context) {
13
+ const { list } = await import('./lib/list.ts')
14
+ return list(context)
15
+ },
16
+
17
+ async getResource (context) {
18
+ const { getResource } = await import('./lib/imports.ts')
19
+ return getResource(context)
20
+ },
21
+
22
+ metadata: {
23
+ title: 'CSW',
24
+ i18n: {
25
+ en: { description: 'Uses CSW 2.0.2 to import datasets (GeoNetwork, ...)' },
26
+ fr: { description: 'Utilise du CSW 2.0.2 pour importer des datasets (GeoNetwork, ...)' }
27
+ },
28
+ capabilities
29
+ },
30
+
31
+ configSchema,
32
+ assertConfigValid
33
+ }
34
+ export default plugin
@@ -0,0 +1,16 @@
1
+ import type { Capability } from '@data-fair/types-catalogs'
2
+
3
+ /**
4
+ * The list of capabilities of the plugin.
5
+ * These capabilities define the actions that can be performed with the plugin.
6
+ * The capabilities must satisfy the `Capability` type.
7
+ */
8
+ export const capabilities = [
9
+ 'search',
10
+ 'pagination',
11
+
12
+ 'import',
13
+ ] satisfies Capability[]
14
+
15
+ export type CSWCapabilities = typeof capabilities
16
+ export default capabilities
package/lib/imports.ts ADDED
@@ -0,0 +1,104 @@
1
+ import { XMLParser } from 'fast-xml-parser'
2
+ import path from 'path'
3
+ import axios from '@data-fair/lib-node/axios.js'
4
+ import type { CatalogPlugin, GetResourceContext, Resource } from '@data-fair/types-catalogs'
5
+ import type { CSWConfig } from '#types'
6
+ import { downloadFileWithProgress } from './utils/download.ts'
7
+ import { findBestDownloadUrl } from './utils/link-selection.ts'
8
+ import { getText } from './utils/common.ts'
9
+
10
+ const parser = new XMLParser({
11
+ ignoreAttributes: false,
12
+ attributeNamePrefix: '',
13
+ removeNSPrefix: true,
14
+ parseTagValue: true
15
+ })
16
+
17
+ /**
18
+ * Fetches the metadata for a given resource ID from the CSW endpoint, determines the best download URL, and downloads the file to a temporary directory.
19
+ * @param context The context object containing catalog configuration, resource ID, temporary directory path, and logger
20
+ * @returns An object containing details about the downloaded resource, including title, description, file path, format, and updated date
21
+ */
22
+ export const getResource = async ({ catalogConfig, resourceId, tmpDir, log }: GetResourceContext<CSWConfig>): ReturnType<CatalogPlugin['getResource']> => {
23
+ // CSW GetRecordById request body (Requesting full ISO 19139 metadata)
24
+ const cswBody = `
25
+ <csw:GetRecordById
26
+ xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
27
+ xmlns:gmd="http://www.isotc211.org/2005/gmd"
28
+ service="CSW"
29
+ version="2.0.2"
30
+ outputSchema="http://www.isotc211.org/2005/gmd">
31
+ <csw:Id>${resourceId}</csw:Id>
32
+ <csw:ElementSetName>full</csw:ElementSetName>
33
+ </csw:GetRecordById>`
34
+
35
+ const baseUrl = catalogConfig.url
36
+
37
+ try {
38
+ await log.step('Fetching metadata (CSW)')
39
+ const response = await axios.post(baseUrl, cswBody, {
40
+ headers: { 'Content-Type': 'application/xml' }
41
+ })
42
+
43
+ const parsed = parser.parse(response.data)
44
+
45
+ // Handle responses with or without namespace prefixes
46
+ const responseRoot = parsed.GetRecordByIdResponse || parsed['csw:GetRecordByIdResponse']
47
+ if (!responseRoot) {
48
+ throw new Error('Invalid or empty CSW response')
49
+ }
50
+
51
+ const metadata = responseRoot.MD_Metadata || responseRoot['gmd:MD_Metadata']
52
+ if (!metadata) {
53
+ throw new Error('ISO 19139 Metadata not found')
54
+ }
55
+
56
+ // Extract basic info using safe helpers
57
+ const dataId = metadata.identificationInfo?.MD_DataIdentification || {}
58
+ const titleObj = dataId.citation?.CI_Citation?.title
59
+ const titleRecord = getText(titleObj) || resourceId
60
+ const abstract = getText(dataId.abstract)
61
+
62
+ // Determine the best URL for file download
63
+ const downloadInfo = await findBestDownloadUrl(metadata, resourceId, log)
64
+
65
+ if (!downloadInfo || !downloadInfo.url) {
66
+ throw new Error(`No suitable download link found for ${resourceId}`)
67
+ }
68
+
69
+ await log.step('Downloading file')
70
+
71
+ // Construct filename based on format
72
+ let fileName = `${resourceId}`
73
+ if (downloadInfo.format === 'shapefile') {
74
+ fileName += '.zip'
75
+ } else if (downloadInfo.format === 'geojson') {
76
+ fileName += '.geojson'
77
+ } else if (downloadInfo.format === 'csv') {
78
+ fileName += '.csv'
79
+ } else if (downloadInfo.format === 'json') {
80
+ fileName += '.json'
81
+ } else if (downloadInfo.format === 'kml') {
82
+ fileName += '.kml'
83
+ } else {
84
+ // Fallback to extension from URL
85
+ fileName += path.extname(downloadInfo.url.split('?')[0]) || ''
86
+ }
87
+
88
+ const destPath = path.join(tmpDir, fileName)
89
+
90
+ await downloadFileWithProgress(downloadInfo.url, destPath, resourceId, log)
91
+
92
+ return {
93
+ id: resourceId,
94
+ title: titleRecord,
95
+ description: abstract,
96
+ filePath: destPath,
97
+ format: downloadInfo.format,
98
+ updatedAt: getText(metadata.dateStamp?.Date || metadata.dateStamp?.DateTime) || new Date().toISOString(),
99
+ size: 0
100
+ } as Resource
101
+ } catch (error: any) {
102
+ throw new Error(error.message || 'Error fetching resource from CSW')
103
+ }
104
+ }
package/lib/list.ts ADDED
@@ -0,0 +1,142 @@
1
+ import type { CatalogPlugin, ListContext } from '@data-fair/types-catalogs'
2
+ import type { CSWConfig } from '#types'
3
+ import type { CswRecord } from './utils/types.ts'
4
+ import { XMLParser } from 'fast-xml-parser'
5
+ import axios from '@data-fair/lib-node/axios.js'
6
+ import capabilities from './capabilities.ts'
7
+ import { asArray, getText } from './utils/common.ts'
8
+
9
+ type ResourceList = Awaited<ReturnType<CatalogPlugin['list']>>['results']
10
+
11
+ const parser = new XMLParser({
12
+ ignoreAttributes: false,
13
+ attributeNamePrefix: '',
14
+ removeNSPrefix: true
15
+ })
16
+
17
+ /**
18
+ * Performs a CSW GetRecords request to list resources based on the provided query and pagination parameters.
19
+ * @param config The context object containing catalog configuration, query parameters, and logger
20
+ * @returns An object containing the total count of matched records, an array of resource summaries, and the path for pagination
21
+ */
22
+ export const list = async (config: ListContext<CSWConfig, typeof capabilities>): ReturnType<CatalogPlugin<CSWConfig>['list']> => {
23
+ const { catalogConfig, params } = config
24
+ const query = params?.q ? params.q.trim() : ''
25
+ const page = Number(params?.page || 1)
26
+ const size = Number(params?.size || 10)
27
+ const startPosition = (page - 1) * size + 1
28
+
29
+ // Bloc of XML filters to find records with relevant formats in their metadata (WFS, GeoJSON, JSON, CSV, ZIP)
30
+ const formatFilter = `
31
+ <ogc:Or>
32
+ <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
33
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
34
+ <ogc:Literal>%WFS%</ogc:Literal>
35
+ </ogc:PropertyIsLike>
36
+
37
+ <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
38
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
39
+ <ogc:Literal>%JSON%</ogc:Literal>
40
+ </ogc:PropertyIsLike>
41
+
42
+ <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
43
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
44
+ <ogc:Literal>%CSV%</ogc:Literal>
45
+ </ogc:PropertyIsLike>
46
+
47
+ <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
48
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
49
+ <ogc:Literal>%ZIP%</ogc:Literal>
50
+ </ogc:PropertyIsLike>
51
+ </ogc:Or>`
52
+
53
+ const filterBlock = query
54
+ ? `
55
+ <ogc:And>
56
+ <ogc:PropertyIsLike wildCard="%" singleChar="_" escapeChar="\\\\">
57
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
58
+ <ogc:Literal>%${query}%</ogc:Literal>
59
+ </ogc:PropertyIsLike>
60
+ ${formatFilter}
61
+ </ogc:And>`
62
+ : formatFilter
63
+
64
+ const constraintBlock = `
65
+ <csw:Constraint version="1.1.0">
66
+ <ogc:Filter>
67
+ ${filterBlock}
68
+ </ogc:Filter>
69
+ </csw:Constraint>
70
+ <ogc:SortBy xmlns:ogc="http://www.opengis.net/ogc">
71
+ <ogc:SortProperty>
72
+ <ogc:PropertyName>RevisionDate</ogc:PropertyName>
73
+ <ogc:SortOrder>DESC</ogc:SortOrder>
74
+ </ogc:SortProperty>
75
+ </ogc:SortBy>`
76
+
77
+ const cswBody = `
78
+ <csw:GetRecords
79
+ xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
80
+ xmlns:ogc="http://www.opengis.net/ogc"
81
+ service="CSW"
82
+ version="2.0.2"
83
+ resultType="results"
84
+ startPosition="${startPosition}"
85
+ maxRecords="${size}"
86
+ outputSchema="http://www.opengis.net/cat/csw/2.0.2">
87
+ <csw:Query typeNames="csw:Record">
88
+ <csw:ElementSetName>summary</csw:ElementSetName>
89
+ ${constraintBlock}
90
+ </csw:Query>
91
+ </csw:GetRecords>`
92
+
93
+ try {
94
+ const baseUrl = catalogConfig.url
95
+
96
+ const response = await axios.post(baseUrl, cswBody, {
97
+ headers: { 'Content-Type': 'application/xml' }
98
+ })
99
+
100
+ const parsed = parser.parse(response.data)
101
+ const root = parsed.GetRecordsResponse || parsed['csw:GetRecordsResponse']
102
+ if (!root) {
103
+ console.error('Réponse XML invalide (pas de GetRecordsResponse)')
104
+ return { count: 0, results: [], path: [] }
105
+ }
106
+
107
+ const searchResults = root.SearchResults || root['csw:SearchResults']
108
+ if (!searchResults) {
109
+ console.error('Pas de SearchResults')
110
+ return { count: 0, results: [], path: [] }
111
+ }
112
+
113
+ const totalCount = parseInt(searchResults.numberOfRecordsMatched || searchResults['numberOfRecordsMatched'] || '0', 10)
114
+ const rawRecords = searchResults.SummaryRecord || searchResults.Record || []
115
+ const records = asArray(rawRecords) as CswRecord[]
116
+
117
+ const listResults = records.map((record: any) => {
118
+ const identifier = getText(record.identifier || record['dc:identifier'])
119
+ const titleRecord = getText(record.title || record['dc:title']) || 'Sans titre'
120
+ const rawDateObj = record.modified || record.date || record.dateStamp || record.RevisionDate
121
+ const dateRaw = getText(rawDateObj)
122
+ const type = getText(record.type || record['dc:type'])
123
+ return {
124
+ id: identifier,
125
+ title: titleRecord,
126
+ updatedAt: dateRaw || new Date().toISOString(),
127
+ type: 'resource',
128
+ format: type || 'unknown'
129
+ }
130
+ }) as ResourceList
131
+
132
+ return {
133
+ count: totalCount,
134
+ results: listResults,
135
+ path: []
136
+ }
137
+ } catch (error: any) {
138
+ console.error('ERREUR :', error.message)
139
+ if (error.response) console.error('Data:', error.response.data)
140
+ throw new Error('Erreur lors de la recherche CSW')
141
+ }
142
+ }
@@ -0,0 +1,12 @@
1
+ export const asArray = (input: any): any[] => {
2
+ if (!input) return []
3
+ return Array.isArray(input) ? input : [input]
4
+ }
5
+
6
+ export const getText = (input: any): string => {
7
+ if (!input) return ''
8
+ if (typeof input === 'string') return input
9
+ if (input.CharacterString) return input.CharacterString
10
+ if (input['#text']) return input['#text']
11
+ return ''
12
+ }
@@ -0,0 +1,122 @@
1
+ import fs from 'fs'
2
+ import axios from '@data-fair/lib-node/axios.js'
3
+ import type { AxiosRequestConfig } from 'axios'
4
+
5
+ /**
6
+ * Downloads a file from a URL to a local path.
7
+ * @param url - The source URL to download from.
8
+ * @param destPath - The local file system path where the file will be saved.
9
+ * @param label - A label used for logging (the resource ID).
10
+ * @param log - The logger object
11
+ * @param axiosConfig - Optional Axios configuration
12
+ * @returns A promise that resolves to the destination path upon success.
13
+ */
14
+ export async function downloadFileWithProgress (
15
+ url: string,
16
+ destPath: string,
17
+ label: string,
18
+ log: any,
19
+ axiosConfig: AxiosRequestConfig = {}
20
+ ): Promise<string> {
21
+ // Create a write stream to save the file to the disk
22
+ const writer = fs.createWriteStream(destPath)
23
+
24
+ try {
25
+ // Make the HTTP GET request
26
+ const response = await axios.get(url, {
27
+ ...axiosConfig,
28
+ responseType: 'stream',
29
+ })
30
+
31
+ // Attempt to get the total file size from headers for progress calculation
32
+ const totalLength = response.headers['content-length']
33
+ ? parseInt(response.headers['content-length'], 10)
34
+ : undefined
35
+
36
+ // Initialize the logging task
37
+ await log.task(`download ${label}`, 'Downloading...', totalLength)
38
+
39
+ let downloadedBytes = 0
40
+ let lastLogged = Date.now()
41
+ const logInterval = 500 // Update progress log every 500ms to avoid spamming the console
42
+
43
+ // Listen to the data chunk event to update progress
44
+ response.data.on('data', (chunk: Buffer) => {
45
+ downloadedBytes += chunk.length
46
+ const now = Date.now()
47
+ // Only update the log if the interval has passed
48
+ if (now - lastLogged > logInterval) {
49
+ lastLogged = now
50
+ log.progress(`download ${label}`, downloadedBytes, totalLength)
51
+ }
52
+ })
53
+
54
+ // Pipe the download stream directly into the file writer
55
+ response.data.pipe(writer)
56
+
57
+ // Return a promise that resolves when writing is finished or rejects on error
58
+ return await new Promise<string>((resolve, reject) => {
59
+ // Success: The file has been fully written
60
+ writer.on('finish', async () => {
61
+ // Ensure the progress bar shows 100% (or the final byte count) at the end
62
+ await log.progress(`download ${label}`, downloadedBytes, totalLength)
63
+ resolve(destPath)
64
+ })
65
+
66
+ // Error handling helper
67
+ const handleError = (err: any) => {
68
+ // Close the stream explicitly
69
+ writer.close()
70
+ // Delete the partial file to avoid corruption
71
+ fs.unlink(destPath, () => {})
72
+ reject(err)
73
+ }
74
+
75
+ // Listen for errors on both the file writer and the download stream
76
+ writer.on('error', handleError)
77
+ response.data.on('error', handleError)
78
+ })
79
+ } catch (err: any) {
80
+ if (err.response) {
81
+ const status = err.response.status
82
+ if (status >= 400 && status < 500) {
83
+ let msg = `Erreur client (${status})`
84
+ switch (status) {
85
+ case 400:
86
+ msg = 'Requête invalide (400). Les paramètres envoyés sont peut-être incorrects.'
87
+ break
88
+ case 401:
89
+ msg = 'Accès refusé (401). Vérifiez le nom d\'utilisateur et le mot de passe dans la configuration.'
90
+ break
91
+ case 403:
92
+ msg = 'Accès interdit (403). Vous n\'avez pas les droits nécessaires pour accéder à ce fichier.'
93
+ break
94
+ case 404:
95
+ msg = 'Fichier introuvable (404). L\'URL de téléchargement n\'existe plus ou est incorrecte.'
96
+ break
97
+ case 408:
98
+ msg = 'Délai d\'attente dépassé (408). Le serveur a mis trop de temps à répondre.'
99
+ break
100
+ case 410:
101
+ msg = 'Ressource indisponible (410). Le fichier a été définitivement supprimé.'
102
+ break
103
+ case 421:
104
+ msg = 'Requête mal dirigée (421). Le serveur ne peut pas répondre (problème de certificat SSL).'
105
+ break
106
+ case 429:
107
+ msg = 'Trop de requêtes (429). Le serveur limite le nombre de téléchargements (rate limit).'
108
+ break
109
+ default:
110
+ msg = `Erreur client non gérée (${status}).`
111
+ }
112
+ await log.error(msg)
113
+ throw new Error(msg)
114
+ }
115
+ }
116
+ writer.close()
117
+ if (fs.existsSync(destPath)) {
118
+ fs.unlinkSync(destPath)
119
+ }
120
+ throw err
121
+ }
122
+ }
@@ -0,0 +1,232 @@
1
+ import axios from '@data-fair/lib-node/axios.js'
2
+ import { getText, asArray } from './common.ts'
3
+ import type { DownloadCandidate } from './types.ts'
4
+
5
+ export const isUrlValid = async (url: string, log: any, isWFSTest = false): Promise<boolean> => {
6
+ try {
7
+ if (isWFSTest) {
8
+ const testUrl = new URL(url)
9
+ testUrl.searchParams.set('COUNT', '1')
10
+ testUrl.searchParams.set('MAXFEATURES', '1')
11
+
12
+ const response = await axios.get(testUrl.toString(), {
13
+ timeout: 5000,
14
+ validateStatus: (status) => status < 500
15
+ })
16
+
17
+ if (response.status >= 400) {
18
+ return false
19
+ }
20
+ const content = String(response.data)
21
+ if (content.includes('ExceptionReport') || content.includes('ServiceException')) {
22
+ return false
23
+ }
24
+ // if the requested format is JSON but the response is XML, it's likely that the WFS doesn't support the requested format
25
+ const contentType = response.headers['content-type'] || ''
26
+ const requestedFormat = testUrl.searchParams.get('OUTPUTFORMAT') || ''
27
+ if (requestedFormat.includes('json') && contentType.includes('xml')) {
28
+ return false
29
+ }
30
+
31
+ return true
32
+ } else {
33
+ await axios.head(url, { timeout: 3000, validateStatus: (s) => s >= 200 && s < 400 })
34
+ return true
35
+ }
36
+ } catch (err) {
37
+ return false
38
+ }
39
+ }
40
+
41
+ const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
42
+ const link = linkWrapper.CI_OnlineResource || linkWrapper
43
+ const url = getText(link.linkage?.URL)
44
+ const protocol = getText(link.protocol).toLowerCase()
45
+ const name = getText(link.name)
46
+
47
+ if (!url) return null
48
+
49
+ const u = url.toLowerCase()
50
+ const p = protocol
51
+ const n = name.toLowerCase()
52
+
53
+ if (u.includes('service=wfs') && u.includes('outputformat=')) {
54
+ let format = 'wfs_service'
55
+ const score = 50
56
+ if (u.includes('geojson') || u.includes('json')) format = 'geojson'
57
+ else if (u.includes('csv')) format = 'csv'
58
+ else if (u.includes('zip') || u.includes('shape')) format = 'shapefile'
59
+ return { url, format, score }
60
+ }
61
+
62
+ // 2. Shapefile (Zip)
63
+ if (u.includes('shape-zip') || u.endsWith('.zip') || n.includes('shapefile')) {
64
+ return { url, format: 'shapefile', score: 10 }
65
+ }
66
+
67
+ // 3. GeoJSON
68
+ if (u.includes('geojson') || p.includes('geo+json') || n.includes('geojson')) {
69
+ return { url, format: 'geojson', score: 8 }
70
+ }
71
+
72
+ // 4. CSV
73
+ if (u.includes('/csv') || u.includes('.csv') || p.includes('text/csv') || name === 'csv') {
74
+ return { url, format: 'csv', score: 6 }
75
+ }
76
+
77
+ // 5. KML
78
+ if (u.endsWith('.kml') || p.includes('kml')) {
79
+ return { url, format: 'kml', score: 4 }
80
+ }
81
+
82
+ // 6. JSON
83
+ if ((u.includes('/json') || u.includes('.json') || p.includes('application/json') || n === 'json') && !u.includes('geojson')) {
84
+ return { url, format: 'json', score: 5 }
85
+ }
86
+
87
+ // 7. WFS Brut
88
+ if (p.includes('ogc:wfs') || p.includes('wfs') || u.includes('service=wfs')) {
89
+ return { url, format: 'wfs_service', score: 2, layerName: name }
90
+ }
91
+
92
+ return null
93
+ }
94
+
95
+ /**
96
+ * Parses the CSW metadata to find the best download URL based on heuristics and validation.
97
+ * @param metadata The parsed ISO 19139 metadata object
98
+ * @param resourceId The resource ID (used for logging and WFS typeName fallback)
99
+ * @param log The logger object for logging progress and warnings
100
+ * @returns An object containing the best URL and its format, or null if no valid link is found
101
+ */
102
+ export const findBestDownloadUrl = async (metadata: any, resourceId: string, log: any): Promise<{ url: string, format: string } | null> => {
103
+ const root = metadata.MD_Metadata || metadata
104
+ const distributionInfo = root?.distributionInfo?.MD_Distribution
105
+ if (!distributionInfo) return null
106
+
107
+ const declaredFormats: string[] = []
108
+ if (distributionInfo.distributionFormat) {
109
+ const rawFormats = asArray(distributionInfo.distributionFormat)
110
+ for (const f of rawFormats) {
111
+ const formatName = getText(f.MD_Format?.name).toLowerCase()
112
+ if (formatName) declaredFormats.push(formatName)
113
+ }
114
+ }
115
+
116
+ const transferOptions = asArray(distributionInfo.transferOptions)
117
+
118
+ const allLinks: any[] = []
119
+ for (const transfer of transferOptions) {
120
+ const digitalTransfer = transfer.MD_DigitalTransferOptions || transfer
121
+ if (digitalTransfer && digitalTransfer.onLine) {
122
+ allLinks.push(...asArray(digitalTransfer.onLine))
123
+ }
124
+ }
125
+
126
+ if (allLinks.length === 0) return null
127
+
128
+ const candidates: DownloadCandidate[] = []
129
+ for (const link of allLinks) {
130
+ const candidate = analyzeLink(link)
131
+ if (candidate) candidates.push(candidate)
132
+ }
133
+
134
+ candidates.sort((a, b) => b.score - a.score)
135
+
136
+ if (candidates.length === 0) return null
137
+
138
+ log.info(`${candidates.length} liens candidats trouvés. Vérification...`)
139
+
140
+ let bestCandidate: DownloadCandidate | null = null
141
+
142
+ for (const candidate of candidates) {
143
+ // Prioritize direct links with format hints, but allow WFS if no better option is found
144
+ if (candidate.format !== 'wfs_service' || candidate.url.toLowerCase().includes('outputformat=')) {
145
+ if (await isUrlValid(candidate.url, log)) {
146
+ bestCandidate = candidate
147
+ log.info(`Lien direct validé : ${candidate.url}`)
148
+ break
149
+ }
150
+ } else if (!bestCandidate) {
151
+ bestCandidate = candidate
152
+ }
153
+ }
154
+
155
+ if (!bestCandidate) {
156
+ log.warning('Aucun lien n\'a passé le test de validation.')
157
+ return null
158
+ }
159
+
160
+ let { url, format, layerName } = bestCandidate
161
+
162
+ if (url.includes('/api/data/') && !url.toLowerCase().includes('format=')) {
163
+ const separator = url.includes('?') ? '&' : '?'
164
+ url = `${url}${separator}format=csv`
165
+ format = 'csv'
166
+ }
167
+
168
+ if (format === 'wfs_service' && !url.toLowerCase().includes('outputformat=')) {
169
+ log.info(`Service WFS détecté sur ${url}, test des formats supportés...`)
170
+ const [baseUrl, existingQuery] = url.split('?')
171
+ const params = new URLSearchParams(existingQuery || '')
172
+ const keysToDelete: string[] = []
173
+ for (const key of params.keys()) {
174
+ const lowerKey = key.toLowerCase()
175
+ if (['service', 'request', 'version', 'typename', 'typenames', 'outputformat', 'srsname'].includes(lowerKey)) {
176
+ keysToDelete.push(key)
177
+ }
178
+ }
179
+ keysToDelete.forEach(k => params.delete(k))
180
+
181
+ params.set('SERVICE', 'WFS')
182
+ params.set('VERSION', '2.0.0')
183
+ params.set('REQUEST', 'GetFeature')
184
+ if (layerName) {
185
+ params.set('TYPENAMES', layerName)
186
+ } else {
187
+ params.set('TYPENAMES', resourceId)
188
+ }
189
+
190
+ const formatsToTry = [
191
+ { param: 'application/json; subtype=geojson', format: 'geojson' },
192
+ { param: 'geojson', format: 'geojson' },
193
+ { param: 'application/json', format: 'geojson' },
194
+ { param: 'application/vnd.geo+json', format: 'geojson' },
195
+ { param: 'json', format: 'geojson' },
196
+ { param: 'SHAPE-ZIP', format: 'shapefile' },
197
+ { param: 'shapezip', format: 'shapefile' },
198
+ { param: 'application/zip', format: 'shapefile' },
199
+ { param: 'application/x-shapefile', format: 'shapefile' },
200
+ { param: 'csv', format: 'csv' },
201
+ { param: 'text/csv', format: 'csv' },
202
+ { param: 'kml', format: 'kml' },
203
+ { param: 'application/vnd.google-earth.kml+xml', format: 'kml' }
204
+ ]
205
+
206
+ let foundUrl = null
207
+ let foundFormat = null
208
+
209
+ for (const f of formatsToTry) {
210
+ const testParams = new URLSearchParams(params)
211
+ testParams.set('OUTPUTFORMAT', f.param)
212
+ const testUrl = `${baseUrl}?${testParams.toString()}`
213
+ if (await isUrlValid(testUrl, log, true)) {
214
+ log.info(`Format WFS supporté trouvé : ${f.param}`)
215
+ foundUrl = testUrl
216
+ foundFormat = f.format
217
+ break // We stop at the first valid format fFound, prioritizing GeoJSON and Shapefile over others
218
+ }
219
+ }
220
+
221
+ if (foundUrl && foundFormat) {
222
+ url = foundUrl
223
+ log.info(`URL finale WFS : ${url}`)
224
+ format = foundFormat
225
+ } else {
226
+ log.error('Ce service WFS ne propose aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV)')
227
+ return null
228
+ }
229
+ }
230
+
231
+ return { url, format }
232
+ }
@@ -0,0 +1,18 @@
1
+ type CswValue = string | string[] | undefined
2
+
3
+ export interface CswRecord {
4
+ identifier?: CswValue
5
+ title?: CswValue
6
+ description?: CswValue
7
+ modified?: CswValue
8
+ format?: CswValue
9
+ protocol?: CswValue
10
+ references?: { scheme: string, value: string }[]
11
+ }
12
+
13
+ export interface DownloadCandidate {
14
+ url: string
15
+ format: string
16
+ score: number
17
+ layerName?: string
18
+ }
package/package.json ADDED
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "@data-fair/catalog-csw",
3
+ "description": "A simple CSW plugin for the Data Fair catalogs service.",
4
+ "version": "0.1.0",
5
+ "main": "index.ts",
6
+ "type": "module",
7
+ "scripts": {
8
+ "build-types": "df-build-types ./",
9
+ "check-types": "tsc",
10
+ "lint": "eslint .",
11
+ "prepare": "husky || true",
12
+ "test-base": "NODE_ENV=test EVENTS_LOG_LEVEL=alert node --disable-warning=ExperimentalWarning --test-force-exit --test-concurrency=1 --test",
13
+ "test-only": "npm run test-base -- --test-only test-it/*.ts",
14
+ "test": "npm run test-base test-it/*.ts",
15
+ "quality": "npm run lint && npm run build-types && npm run check-types && npm run test && npm audit --omit=dev --audit-level=critical",
16
+ "prepublishOnly": "npm run build-types"
17
+ },
18
+ "files": [
19
+ "./lib/**",
20
+ "./types/**",
21
+ "index.ts"
22
+ ],
23
+ "imports": {
24
+ "#types": "./types/index.ts",
25
+ "#type/*": "./types/*"
26
+ },
27
+ "keywords": [
28
+ "data-fair-catalogs-plugin"
29
+ ],
30
+ "license": "MIT",
31
+ "dependencies": {
32
+ "@data-fair/lib-validation": "^1.0.2",
33
+ "@data-fair/lib-node": "^2.8.1",
34
+ "@data-fair/lib-utils": "^1.6.0",
35
+ "fast-xml-parser": "^5.3.5",
36
+ "prom-client": "^15.1.3"
37
+ },
38
+ "devDependencies": {
39
+ "@commitlint/cli": "^19.8.0",
40
+ "@commitlint/config-conventional": "^19.8.0",
41
+ "@data-fair/lib-types-builder": "^1.8.0",
42
+ "@data-fair/types-catalogs": "^0.6.0",
43
+ "@types/fs-extra": "^11.0.4",
44
+ "chalk": "^5.4.1",
45
+ "dayjs": "^1.11.13",
46
+ "draftlog": "^1.0.13",
47
+ "eslint": "^9.25.1",
48
+ "fs-extra": "^11.3.0",
49
+ "husky": "^9.1.7",
50
+ "neostandard": "^0.12.1",
51
+ "typescript": "^5.8.3"
52
+ },
53
+ "relativeDependencies": {
54
+ "@data-fair/types-catalogs": "../../catalogs/types-catalogs"
55
+ }
56
+ }
@@ -0,0 +1,20 @@
1
+
2
+ export const schemaExports: string[]
3
+
4
+ // see https://github.com/bcherny/json-schema-to-typescript/issues/439 if some types are not exported
5
+ /**
6
+ * URL of the CSW service to connect to (GeoNetwork, ...)
7
+ */
8
+ export type CSWServiceURL = string;
9
+
10
+ export type CSWConfig = {
11
+ url: CSWServiceURL;
12
+ }
13
+
14
+
15
+ export declare function validate(data: any): data is CswConfig
16
+ export declare function assertValid(data: any, options?: import('@data-fair/lib-validation').AssertValidOptions): asserts data is CswConfig
17
+ export declare function returnValid(data: any, options?: import('@data-fair/lib-validation').AssertValidOptions): CswConfig
18
+
19
+ export declare const schema: any
20
+
@@ -0,0 +1,50 @@
1
+ /* eslint-disable */
2
+
3
+
4
+ import validate from './validate.js'
5
+ import { assertValid as assertValidGeneric } from '@data-fair/lib-validation'
6
+
7
+ export const schemaExports = [
8
+ "types",
9
+ "validate",
10
+ "schema"
11
+ ]
12
+
13
+ export { validate } from './validate.js'
14
+ export function assertValid(data, options) {
15
+ assertValidGeneric(validate, data, options)
16
+ }
17
+ export function returnValid(data, options) {
18
+ assertValid(data, options)
19
+ return data
20
+ }
21
+
22
+ export const schema = {
23
+ "$id": "https://github.com/data-fair/catalog-csw/catalog-config",
24
+ "x-exports": [
25
+ "types",
26
+ "validate",
27
+ "schema"
28
+ ],
29
+ "title": "CSWConfig",
30
+ "type": "object",
31
+ "additionalProperties": false,
32
+ "properties": {
33
+ "url": {
34
+ "title": "CSW service URL",
35
+ "x-i18n-title": {
36
+ "fr": "URL du service CSW"
37
+ },
38
+ "description": "URL of the CSW service to connect to (GeoNetwork, ...)",
39
+ "x-i18n-description": {
40
+ "fr": "URL du service CSW auquel se connecter (GeoNetwork, ...)"
41
+ },
42
+ "type": "string",
43
+ "format": "uri",
44
+ "default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
45
+ }
46
+ },
47
+ "required": [
48
+ "url"
49
+ ]
50
+ }
@@ -0,0 +1,76 @@
1
+ /* eslint-disable */
2
+ // @ts-nocheck
3
+
4
+ import { fullFormats } from "ajv-formats/dist/formats.js";
5
+ "use strict";
6
+ export const validate = validate14;
7
+ export default validate14;
8
+ const schema16 = {"$id":"https://github.com/data-fair/catalog-csw/catalog-config","x-exports":["types","validate","schema"],"title":"CSWConfig","type":"object","additionalProperties":false,"properties":{"url":{"title":"CSW service URL","x-i18n-title":{"fr":"URL du service CSW"},"description":"URL of the CSW service to connect to (GeoNetwork, ...)","x-i18n-description":{"fr":"URL du service CSW auquel se connecter (GeoNetwork, ...)"},"type":"string","format":"uri","default":"https://geobretagne.fr/geonetwork/srv/fre/csw"}},"required":["url"]};
9
+ const formats0 = fullFormats.uri;
10
+
11
+ function validate14(data, {instancePath="", parentData, parentDataProperty, rootData=data}={}){
12
+ /*# sourceURL="https://github.com/data-fair/catalog-csw/catalog-config" */;
13
+ let vErrors = null;
14
+ let errors = 0;
15
+ if(data && typeof data == "object" && !Array.isArray(data)){
16
+ if(data.url === undefined){
17
+ const err0 = {instancePath,schemaPath:"#/required",keyword:"required",params:{missingProperty: "url"},message:"must have required property '"+"url"+"'"};
18
+ if(vErrors === null){
19
+ vErrors = [err0];
20
+ }
21
+ else {
22
+ vErrors.push(err0);
23
+ }
24
+ errors++;
25
+ }
26
+ for(const key0 in data){
27
+ if(!(key0 === "url")){
28
+ const err1 = {instancePath,schemaPath:"#/additionalProperties",keyword:"additionalProperties",params:{additionalProperty: key0},message:"must NOT have additional properties"};
29
+ if(vErrors === null){
30
+ vErrors = [err1];
31
+ }
32
+ else {
33
+ vErrors.push(err1);
34
+ }
35
+ errors++;
36
+ }
37
+ }
38
+ if(data.url !== undefined){
39
+ let data0 = data.url;
40
+ if(typeof data0 === "string"){
41
+ if(!(formats0(data0))){
42
+ const err2 = {instancePath:instancePath+"/url",schemaPath:"#/properties/url/format",keyword:"format",params:{format: "uri"},message:"must match format \""+"uri"+"\""};
43
+ if(vErrors === null){
44
+ vErrors = [err2];
45
+ }
46
+ else {
47
+ vErrors.push(err2);
48
+ }
49
+ errors++;
50
+ }
51
+ }
52
+ else {
53
+ const err3 = {instancePath:instancePath+"/url",schemaPath:"#/properties/url/type",keyword:"type",params:{type: "string"},message:"must be string"};
54
+ if(vErrors === null){
55
+ vErrors = [err3];
56
+ }
57
+ else {
58
+ vErrors.push(err3);
59
+ }
60
+ errors++;
61
+ }
62
+ }
63
+ }
64
+ else {
65
+ const err4 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
66
+ if(vErrors === null){
67
+ vErrors = [err4];
68
+ }
69
+ else {
70
+ vErrors.push(err4);
71
+ }
72
+ errors++;
73
+ }
74
+ validate14.errors = vErrors;
75
+ return errors === 0;
76
+ }
@@ -0,0 +1 @@
1
+ export * from './.type/index.js'
@@ -0,0 +1,27 @@
1
+ {
2
+ "$id": "https://github.com/data-fair/catalog-csw/catalog-config",
3
+ "x-exports": [
4
+ "types",
5
+ "validate",
6
+ "schema"
7
+ ],
8
+ "title": "CSWConfig",
9
+ "type": "object",
10
+ "additionalProperties": false,
11
+ "properties": {
12
+ "url": {
13
+ "title": "CSW service URL",
14
+ "x-i18n-title": {
15
+ "fr": "URL du service CSW"
16
+ },
17
+ "description": "URL of the CSW service to connect to (GeoNetwork, ...)",
18
+ "x-i18n-description": {
19
+ "fr": "URL du service CSW auquel se connecter (GeoNetwork, ...)"
20
+ },
21
+ "type": "string",
22
+ "format": "uri",
23
+ "default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
24
+ }
25
+ },
26
+ "required": ["url"]
27
+ }
package/types/index.ts ADDED
@@ -0,0 +1 @@
1
+ export { schema as configSchema, assertValid as assertConfigValid, type CSWConfig } from './catalogConfig/index.ts'