@data-fair/catalog-csw 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -4,10 +4,8 @@ import { type CSWCapabilities, capabilities } from './lib/capabilities.ts'
4
4
 
5
5
  const plugin: CatalogPlugin<CSWConfig, CSWCapabilities> = {
6
6
  async prepare (context) {
7
- if (context.catalogConfig.url) {
8
- context.catalogConfig.url = context.catalogConfig.url.trim()
9
- }
10
- return context
7
+ const prepare = (await import('./lib/prepare.ts')).default
8
+ return prepare(context)
11
9
  },
12
10
  async list (context) {
13
11
  const { list } = await import('./lib/list.ts')
@@ -21,6 +19,7 @@ const plugin: CatalogPlugin<CSWConfig, CSWCapabilities> = {
21
19
 
22
20
  metadata: {
23
21
  title: 'CSW',
22
+ thumbnailPath: './lib/resource/logo.svg',
24
23
  i18n: {
25
24
  en: { description: 'Uses CSW 2.0.2 to import datasets (GeoNetwork, ...)' },
26
25
  fr: { description: 'Utilise du CSW 2.0.2 pour importer des datasets (GeoNetwork, ...)' }
@@ -7,6 +7,7 @@ import type { Capability } from '@data-fair/types-catalogs'
7
7
  */
8
8
  export const capabilities = [
9
9
  'search',
10
+ 'thumbnail',
10
11
  'pagination',
11
12
 
12
13
  'import',
package/lib/list.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { CatalogPlugin, ListContext } from '@data-fair/types-catalogs'
1
+ import type { CatalogPlugin, ListContext, Folder } from '@data-fair/types-catalogs'
2
2
  import type { CSWConfig } from '#types'
3
3
  import type { CswRecord } from './utils/types.ts'
4
4
  import { XMLParser } from 'fast-xml-parser'
@@ -21,58 +21,84 @@ const parser = new XMLParser({
21
21
  */
22
22
  export const list = async (config: ListContext<CSWConfig, typeof capabilities>): ReturnType<CatalogPlugin<CSWConfig>['list']> => {
23
23
  const { catalogConfig, params } = config
24
+ const currentFolderId = params?.currentFolderId
25
+
26
+ if (!currentFolderId) {
27
+ const cswBody = `
28
+ <csw:GetRecords
29
+ xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
30
+ service="CSW" version="2.0.2" resultType="results"
31
+ startPosition="1" maxRecords="100"
32
+ outputSchema="http://www.opengis.net/cat/csw/2.0.2">
33
+ <csw:Query typeNames="csw:Record">
34
+ <csw:ElementSetName>brief</csw:ElementSetName>
35
+ </csw:Query>
36
+ </csw:GetRecords>`
37
+
38
+ try {
39
+ const response = await axios.post(catalogConfig.url, cswBody, {
40
+ headers: { 'Content-Type': 'application/xml' }
41
+ })
42
+
43
+ const parsed = parser.parse(response.data)
44
+ const root = parsed.GetRecordsResponse || parsed['csw:GetRecordsResponse']
45
+ const searchResults = root?.SearchResults || root?.['csw:SearchResults']
46
+ const rawRecords = searchResults?.BriefRecord || searchResults?.SummaryRecord || searchResults?.Record || []
47
+ const records = asArray(rawRecords)
48
+
49
+ const typesSet = new Set<string>()
50
+ records.forEach((record: any) => {
51
+ const typeStr = getText(record.type || record['dc:type'])
52
+ if (typeStr && typeStr !== 'unknown') {
53
+ typesSet.add(typeStr.toLowerCase())
54
+ }
55
+ })
56
+ const folders = Array.from(typesSet).map(type => ({
57
+ id: type,
58
+ title: type.toUpperCase(),
59
+ type: 'folder'
60
+ } as Folder))
61
+
62
+ if (folders.length === 0) {
63
+ folders.push({ id: 'all', title: 'TOUS LES DOCUMENTS', type: 'folder' as const })
64
+ }
65
+ return {
66
+ count: folders.length,
67
+ results: folders,
68
+ path: []
69
+ }
70
+ } catch (error: any) {
71
+ console.error('Erreur GetDomain:', error.message)
72
+ return { count: 0, results: [], path: [] }
73
+ }
74
+ }
24
75
  const query = params?.q ? params.q.trim() : ''
25
76
  const page = Number(params?.page || 1)
26
77
  const size = Number(params?.size || 10)
27
78
  const startPosition = (page - 1) * size + 1
28
79
 
29
- // Bloc of XML filters to find records with relevant formats in their metadata (WFS, GeoJSON, JSON, CSV, ZIP)
30
- const formatFilter = `
31
- <ogc:Or>
32
- <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
33
- <ogc:PropertyName>AnyText</ogc:PropertyName>
34
- <ogc:Literal>%WFS%</ogc:Literal>
35
- </ogc:PropertyIsLike>
36
-
37
- <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
38
- <ogc:PropertyName>AnyText</ogc:PropertyName>
39
- <ogc:Literal>%JSON%</ogc:Literal>
40
- </ogc:PropertyIsLike>
41
-
42
- <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
43
- <ogc:PropertyName>AnyText</ogc:PropertyName>
44
- <ogc:Literal>%CSV%</ogc:Literal>
45
- </ogc:PropertyIsLike>
46
-
47
- <ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
48
- <ogc:PropertyName>AnyText</ogc:PropertyName>
49
- <ogc:Literal>%ZIP%</ogc:Literal>
50
- </ogc:PropertyIsLike>
51
- </ogc:Or>`
52
-
53
- const filterBlock = query
80
+ const typeFilter = `
81
+ <ogc:PropertyIsEqualTo>
82
+ <ogc:PropertyName>dc:type</ogc:PropertyName> <ogc:Literal>${currentFolderId}</ogc:Literal>
83
+ </ogc:PropertyIsEqualTo>`
84
+
85
+ const filterContent = query
54
86
  ? `
55
- <ogc:And>
56
- <ogc:PropertyIsLike wildCard="%" singleChar="_" escapeChar="\\\\">
57
- <ogc:PropertyName>AnyText</ogc:PropertyName>
58
- <ogc:Literal>%${query}%</ogc:Literal>
59
- </ogc:PropertyIsLike>
60
- ${formatFilter}
61
- </ogc:And>`
62
- : formatFilter
87
+ <ogc:And>
88
+ ${typeFilter}
89
+ <ogc:PropertyIsLike wildCard="%" singleChar="_" escapeChar="\\\\">
90
+ <ogc:PropertyName>AnyText</ogc:PropertyName>
91
+ <ogc:Literal>%${query}%</ogc:Literal>
92
+ </ogc:PropertyIsLike>
93
+ </ogc:And>`
94
+ : typeFilter
63
95
 
64
96
  const constraintBlock = `
65
97
  <csw:Constraint version="1.1.0">
66
98
  <ogc:Filter>
67
- ${filterBlock}
99
+ ${filterContent}
68
100
  </ogc:Filter>
69
- </csw:Constraint>
70
- <ogc:SortBy xmlns:ogc="http://www.opengis.net/ogc">
71
- <ogc:SortProperty>
72
- <ogc:PropertyName>RevisionDate</ogc:PropertyName>
73
- <ogc:SortOrder>DESC</ogc:SortOrder>
74
- </ogc:SortProperty>
75
- </ogc:SortBy>`
101
+ </csw:Constraint>`
76
102
 
77
103
  const cswBody = `
78
104
  <csw:GetRecords
@@ -119,20 +145,21 @@ export const list = async (config: ListContext<CSWConfig, typeof capabilities>):
119
145
  const titleRecord = getText(record.title || record['dc:title']) || 'Sans titre'
120
146
  const rawDateObj = record.modified || record.date || record.dateStamp || record.RevisionDate
121
147
  const dateRaw = getText(rawDateObj)
122
- const type = getText(record.type || record['dc:type'])
123
148
  return {
124
149
  id: identifier,
125
150
  title: titleRecord,
126
151
  updatedAt: dateRaw || new Date().toISOString(),
127
152
  type: 'resource',
128
- format: type || 'unknown'
153
+ format: currentFolderId
129
154
  }
130
155
  }) as ResourceList
131
156
 
132
157
  return {
133
158
  count: totalCount,
134
159
  results: listResults,
135
- path: []
160
+ path: [
161
+ { id: currentFolderId, title: currentFolderId.toUpperCase(), type: 'folder' }
162
+ ]
136
163
  }
137
164
  } catch (error: any) {
138
165
  console.error('ERREUR :', error.message)
package/lib/prepare.ts ADDED
@@ -0,0 +1,42 @@
1
+ import dns from 'dns/promises'
2
+ import { URL } from 'url'
3
+ import type { CSWConfig } from '#types'
4
+ import type { CSWCapabilities } from './capabilities.ts'
5
+ import type { PrepareContext } from '@data-fair/types-catalogs'
6
+
7
+ export default async ({ catalogConfig }: PrepareContext<CSWConfig, CSWCapabilities>) => {
8
+ if (!catalogConfig || !catalogConfig.url) {
9
+ throw new Error("L'URL du catalogue est obligatoire.")
10
+ }
11
+ const urlString = catalogConfig.url.trim()
12
+ let url: URL
13
+ try {
14
+ url = new URL(urlString)
15
+ } catch (err) {
16
+ throw new Error("L'URL fournie n'est pas valide.")
17
+ }
18
+ if (!['http:', 'https:'].includes(url.protocol)) {
19
+ throw new Error('Seuls les protocoles HTTP et HTTPS sont autorisés.')
20
+ }
21
+ try {
22
+ const { address } = await dns.lookup(url.hostname, { family: 4 })
23
+ const isPrivateIp =
24
+ /^127\./.test(address) ||
25
+ /^10\./.test(address) ||
26
+ /^192\.168\./.test(address) ||
27
+ /^169\.254\./.test(address) ||
28
+ /^0\./.test(address) ||
29
+ /^172\.(1[6-9]|2\d|3[0-1])\./.test(address)
30
+
31
+ if (isPrivateIp) {
32
+ throw new Error(`L'URL est interdite car elle pointe vers un réseau interne (${address}).`)
33
+ }
34
+ } catch (err: any) {
35
+ if (err.message && err.message.includes('interdite')) throw err
36
+ throw new Error(`Impossible de résoudre l'adresse : ${url.hostname}`)
37
+ }
38
+ catalogConfig.url = url.toString()
39
+ return {
40
+ catalogConfig
41
+ }
42
+ }
@@ -0,0 +1 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" width="24" height="24" fill="#000000" style="opacity:1;"><path d="M49.953 5A45 45 0 0 0 7.758 34.498H2.5A3 3 0 0 0-.502 37.5v25A3 3 0 0 0 2.5 65.502h5.256A45 45 0 0 0 50 95a45 45 0 0 0 42.242-29.498H97.5a3 3 0 0 0 3.002-3.002v-25a3 3 0 0 0-3.002-3.002h-5.256A45 45 0 0 0 50 5zm2.297 5.113c4.74.807 9.271 4.713 12.84 11.194c1.15 2.089 2.18 4.433 3.068 6.974H52.25zm-4.5.178v17.99H32.676c.889-2.541 1.916-4.885 3.066-6.974c3.36-6.1 7.571-9.915 12.008-11.016M37.383 11.51c-2.092 2.116-3.971 4.698-5.584 7.627c-1.512 2.745-2.813 5.819-3.881 9.144h-12.11A40.52 40.52 0 0 1 37.384 11.51m26.469.416c8.457 3.07 15.586 8.88 20.34 16.355H72.91c-1.066-3.326-2.365-6.4-3.877-9.144c-1.509-2.74-3.251-5.174-5.181-7.211M13.33 32.78h13.328c-.135.561-.246 1.143-.367 1.717h-13.71a39 39 0 0 1 .75-1.717m17.977 0H47.75v1.717H30.908c.131-.574.253-1.158.399-1.717m20.943 0h17.275c.146.56.268 1.143.399 1.717H52.25zm21.92 0h12.5c.265.565.512 1.138.75 1.717H74.535c-.12-.574-.23-1.156-.365-1.717m-46.861 9.065q3.188 0 5.007 1.166q1.83 1.166 2.717 3.584l-4.605.925q-.242-.697-.506-1.019a2.94 2.94 0 0 0-1.07-.834a3.35 3.35 0 0 0-1.416-.291q-1.773 0-2.717 1.291q-.714.958-.715 3.01q.001 2.541.853 3.49q.853.937 2.395.937q1.497.001 2.256-.76q.771-.76 1.117-2.208l4.568 1.25q-.46 1.74-1.449 2.906a6.2 6.2 0 0 1-2.465 1.76q-1.46.594-3.728.593q-2.752 0-4.502-.718q-1.738-.729-3.004-2.551t-1.266-4.668q0-3.791 2.221-5.822q2.233-2.041 6.309-2.041m19.523 0q3.236 0 4.928 1.093q1.703 1.084 2.025 3.457l-4.914.262q-.196-1.03-.83-1.5q-.621-.468-1.727-.469q-.909 0-1.369.354q-.46.344-.46.844q0 .365.38.656q.368.302 1.748.562q3.42.667 4.893 1.354q1.486.677 2.154 1.687q.679 1.012.678 2.262q0 1.468-.897 2.707q-.897 1.24-2.51 1.887q-1.612.635-4.064.635q-4.305 0-5.963-1.5q-1.657-1.5-1.877-3.813l4.963-.281q.162 1.094.656 1.666q.806.928 2.301.928q1.118 0 1.717-.469q.61-.48.61-1.104q0-.593-.577-1.062q-.575-.47-2.67-.887q-3.43-.697-4.892-1.853q-1.475-1.156-1.475-2.948q0-1.177.75-2.218q.76-1.052 2.268-1.647q1.519-.604 4.154-.603m10.604.26h4.949l1.785 8.53l2.601-8.53h4.94l2.611 8.53l1.785-8.53h4.928l-3.719 15.271h-5.11l-2.96-9.615l-2.947 9.615h-5.112zM12.58 65.501h13.524c.112.573.214 1.154.341 1.715H13.33a39 39 0 0 1-.75-1.715m18.129 0H47.75v1.715H31.082c-.137-.56-.25-1.142-.373-1.715m21.541 0h17.873c-.123.573-.236 1.155-.373 1.715h-17.5zm22.473 0H87.42a39 39 0 0 1-.75 1.715H74.38c.128-.561.23-1.142.343-1.715m-58.914 6.215h11.824c1.117 3.675 2.518 7.056 4.166 10.049c1.294 2.35 2.762 4.472 4.369 6.316c-8.466-3.07-15.603-8.884-20.36-16.365m16.554 0H47.75v18.719c-.277-.016-.55-.044-.826-.065c-4.132-1.35-8.032-5.057-11.182-10.777c-1.285-2.335-2.424-4.984-3.379-7.877m19.887 0h16.219c-.955 2.893-2.094 5.542-3.38 7.877c-3.084 5.602-6.888 9.278-10.925 10.695q-.951.096-1.914.147zm20.947 0h10.994a40.56 40.56 0 0 1-19.105 15.877c1.443-1.728 2.766-3.684 3.947-5.828c1.648-2.993 3.049-6.373 4.164-10.05" color="currentColor"/></svg>
@@ -15,10 +15,12 @@ export const isUrlValid = async (url: string, log: any, isWFSTest = false): Prom
15
15
  })
16
16
 
17
17
  if (response.status >= 400) {
18
+ log.warning(`URL non valide : ${url}`)
18
19
  return false
19
20
  }
20
21
  const content = String(response.data)
21
22
  if (content.includes('ExceptionReport') || content.includes('ServiceException')) {
23
+ log.warning(`URL non valide : ${url}`)
22
24
  return false
23
25
  }
24
26
  // if the requested format is JSON but the response is XML, it's likely that the WFS doesn't support the requested format
@@ -34,10 +36,120 @@ export const isUrlValid = async (url: string, log: any, isWFSTest = false): Prom
34
36
  return true
35
37
  }
36
38
  } catch (err) {
39
+ log.warning(`URL non valide : ${url}`)
37
40
  return false
38
41
  }
39
42
  }
40
43
 
44
+ const negotiateWfsFormat = async (
45
+ originalUrl: string,
46
+ resourceId: string,
47
+ layerName: string | null | undefined,
48
+ log: any
49
+ ): Promise<{ url: string; format: string } | null> => {
50
+ log.info(`Service WFS détecté sur ${originalUrl}, test des formats supportés...`)
51
+
52
+ const [baseUrl, existingQuery] = originalUrl.split('?')
53
+ const params = new URLSearchParams(existingQuery || '')
54
+
55
+ const keysToDelete: string[] = []
56
+ for (const key of params.keys()) {
57
+ const lowerKey = key.toLowerCase()
58
+ if (['service', 'request', 'version', 'typename', 'typenames', 'outputformat', 'srsname'].includes(lowerKey)) {
59
+ keysToDelete.push(key)
60
+ }
61
+ }
62
+ keysToDelete.forEach(k => params.delete(k))
63
+
64
+ params.set('SERVICE', 'WFS')
65
+ params.set('VERSION', '2.0.0')
66
+ params.set('REQUEST', 'GetFeature')
67
+ params.set('TYPENAMES', layerName || resourceId)
68
+
69
+ const formatsToTry = [
70
+ { param: 'application/json; subtype=geojson', format: 'geojson' },
71
+ { param: 'geojson', format: 'geojson' },
72
+ { param: 'application/json', format: 'geojson' },
73
+ { param: 'application/vnd.geo+json', format: 'geojson' },
74
+ { param: 'json', format: 'geojson' },
75
+ { param: 'SHAPE-ZIP', format: 'shapefile' },
76
+ { param: 'shapezip', format: 'shapefile' },
77
+ { param: 'application/zip', format: 'shapefile' },
78
+ { param: 'application/x-shapefile', format: 'shapefile' },
79
+ { param: 'csv', format: 'csv' },
80
+ { param: 'text/csv', format: 'csv' },
81
+ { param: 'kml', format: 'kml' },
82
+ { param: 'application/vnd.google-earth.kml+xml', format: 'kml' }
83
+ ]
84
+
85
+ for (const f of formatsToTry) {
86
+ const testParams = new URLSearchParams(params)
87
+ testParams.set('OUTPUTFORMAT', f.param)
88
+ const testUrl = `${baseUrl}?${testParams.toString()}`
89
+ if (await isUrlValid(testUrl, log, true)) {
90
+ log.info(`Format WFS supporté trouvé : ${f.param}. URL finale WFS : ${testUrl}`)
91
+ return {
92
+ url: testUrl,
93
+ format: f.format
94
+ }
95
+ }
96
+ }
97
+
98
+ log.error('Ce service WFS ne propose aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV)')
99
+ return null
100
+ }
101
+
102
+ const detectFormatFromHeaders = async (url: string, log: any): Promise<string | null> => {
103
+ try {
104
+ const response = await axios.head(url, {
105
+ timeout: 5000,
106
+ validateStatus: (status) => status < 400
107
+ })
108
+ const contentType = response.headers['content-type']
109
+ if (!contentType) return null
110
+
111
+ if (contentType.includes('application/json') || contentType.includes('application/geo+json')) {
112
+ return 'geojson'
113
+ }
114
+ if (contentType.includes('application/zip') || contentType.includes('application/x-zip-compressed')) {
115
+ return 'shapefile'
116
+ }
117
+ if (contentType.includes('text/csv') || contentType.includes('application/csv')) {
118
+ return 'csv'
119
+ }
120
+ if (contentType.includes('kml') || contentType.includes('xml')) {
121
+ return 'kml'
122
+ }
123
+ if (contentType.includes('text/tab-separated-values') || contentType.includes('text/tsv')) {
124
+ return 'tsv'
125
+ }
126
+
127
+ if (contentType.includes('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')) {
128
+ return 'xlsx'
129
+ }
130
+
131
+ if (contentType.includes('application/vnd.ms-excel')) {
132
+ return 'xls'
133
+ }
134
+
135
+ if (contentType.includes('application/vnd.oasis.opendocument.spreadsheet')) {
136
+ return 'ods'
137
+ }
138
+
139
+ if (contentType.includes('application/gpx+xml') || contentType.includes('gpx')) {
140
+ return 'gpx'
141
+ }
142
+
143
+ if (contentType.includes('application/vnd.google-earth.kmz') || contentType.includes('kmz')) {
144
+ return 'kmz'
145
+ }
146
+ return null
147
+ } catch (error) {
148
+ log.warning(`Impossible de détecter le format pour ${url} (Erreur HTTP).`)
149
+ return null
150
+ }
151
+ }
152
+
41
153
  const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
42
154
  const link = linkWrapper.CI_OnlineResource || linkWrapper
43
155
  const url = getText(link.linkage?.URL)
@@ -59,14 +171,18 @@ const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
59
171
  return { url, format, score }
60
172
  }
61
173
 
174
+ if (u.includes('/api/data/') || u.includes('/api/records/')) {
175
+ return { url, format: 'shapefile', score: 11 }
176
+ }
177
+
62
178
  // 2. Shapefile (Zip)
63
179
  if (u.includes('shape-zip') || u.endsWith('.zip') || n.includes('shapefile')) {
64
- return { url, format: 'shapefile', score: 10 }
180
+ return { url, format: 'shapefile', score: 8 }
65
181
  }
66
182
 
67
183
  // 3. GeoJSON
68
184
  if (u.includes('geojson') || p.includes('geo+json') || n.includes('geojson')) {
69
- return { url, format: 'geojson', score: 8 }
185
+ return { url, format: 'geojson', score: 10 }
70
186
  }
71
187
 
72
188
  // 4. CSV
@@ -89,7 +205,7 @@ const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
89
205
  return { url, format: 'wfs_service', score: 2, layerName: name }
90
206
  }
91
207
 
92
- return null
208
+ return { url, format: 'unknown', score: 1 }
93
209
  }
94
210
 
95
211
  /**
@@ -138,10 +254,20 @@ export const findBestDownloadUrl = async (metadata: any, resourceId: string, log
138
254
  log.info(`${candidates.length} liens candidats trouvés. Vérification...`)
139
255
 
140
256
  let bestCandidate: DownloadCandidate | null = null
141
-
142
257
  for (const candidate of candidates) {
143
- // Prioritize direct links with format hints, but allow WFS if no better option is found
144
- if (candidate.format !== 'wfs_service' || candidate.url.toLowerCase().includes('outputformat=')) {
258
+ if (candidate.format === 'wfs_service') {
259
+ log.info('lien WFS detecté, validation en cours...')
260
+ if (await isUrlValid(candidate.url, log)) {
261
+ bestCandidate = candidate
262
+ log.info(`Lien WFS validé : ${candidate.url}`)
263
+ break
264
+ } else {
265
+ const wfsResult = await negotiateWfsFormat(candidate.url, resourceId, candidate.layerName, log)
266
+ if (wfsResult) {
267
+ return wfsResult
268
+ }
269
+ }
270
+ } else if (candidate.format !== 'wfs_service') {
145
271
  if (await isUrlValid(candidate.url, log)) {
146
272
  bestCandidate = candidate
147
273
  log.info(`Lien direct validé : ${candidate.url}`)
@@ -157,73 +283,29 @@ export const findBestDownloadUrl = async (metadata: any, resourceId: string, log
157
283
  return null
158
284
  }
159
285
 
160
- let { url, format, layerName } = bestCandidate
161
-
162
- if (url.includes('/api/data/') && !url.toLowerCase().includes('format=')) {
163
- const separator = url.includes('?') ? '&' : '?'
164
- url = `${url}${separator}format=csv`
165
- format = 'csv'
166
- }
286
+ let { url, format } = bestCandidate
167
287
 
168
- if (format === 'wfs_service' && !url.toLowerCase().includes('outputformat=')) {
169
- log.info(`Service WFS détecté sur ${url}, test des formats supportés...`)
170
- const [baseUrl, existingQuery] = url.split('?')
171
- const params = new URLSearchParams(existingQuery || '')
172
- const keysToDelete: string[] = []
173
- for (const key of params.keys()) {
174
- const lowerKey = key.toLowerCase()
175
- if (['service', 'request', 'version', 'typename', 'typenames', 'outputformat', 'srsname'].includes(lowerKey)) {
176
- keysToDelete.push(key)
177
- }
178
- }
179
- keysToDelete.forEach(k => params.delete(k))
180
-
181
- params.set('SERVICE', 'WFS')
182
- params.set('VERSION', '2.0.0')
183
- params.set('REQUEST', 'GetFeature')
184
- if (layerName) {
185
- params.set('TYPENAMES', layerName)
288
+ if (url.includes('/api/data/')) {
289
+ const urlObj = new URL(url)
290
+ urlObj.searchParams.delete('format')
291
+ const cleanUrl = urlObj.toString()
292
+ const detectedFormat = await detectFormatFromHeaders(cleanUrl, log)
293
+ if (detectedFormat) {
294
+ url = cleanUrl
295
+ format = detectedFormat
186
296
  } else {
187
- params.set('TYPENAMES', resourceId)
188
- }
189
-
190
- const formatsToTry = [
191
- { param: 'application/json; subtype=geojson', format: 'geojson' },
192
- { param: 'geojson', format: 'geojson' },
193
- { param: 'application/json', format: 'geojson' },
194
- { param: 'application/vnd.geo+json', format: 'geojson' },
195
- { param: 'json', format: 'geojson' },
196
- { param: 'SHAPE-ZIP', format: 'shapefile' },
197
- { param: 'shapezip', format: 'shapefile' },
198
- { param: 'application/zip', format: 'shapefile' },
199
- { param: 'application/x-shapefile', format: 'shapefile' },
200
- { param: 'csv', format: 'csv' },
201
- { param: 'text/csv', format: 'csv' },
202
- { param: 'kml', format: 'kml' },
203
- { param: 'application/vnd.google-earth.kml+xml', format: 'kml' }
204
- ]
205
-
206
- let foundUrl = null
207
- let foundFormat = null
208
-
209
- for (const f of formatsToTry) {
210
- const testParams = new URLSearchParams(params)
211
- testParams.set('OUTPUTFORMAT', f.param)
212
- const testUrl = `${baseUrl}?${testParams.toString()}`
213
- if (await isUrlValid(testUrl, log, true)) {
214
- log.info(`Format WFS supporté trouvé : ${f.param}`)
215
- foundUrl = testUrl
216
- foundFormat = f.format
217
- break // We stop at the first valid format fFound, prioritizing GeoJSON and Shapefile over others
218
- }
297
+ url = cleanUrl
298
+ format = 'shapefile'
219
299
  }
300
+ }
220
301
 
221
- if (foundUrl && foundFormat) {
222
- url = foundUrl
223
- log.info(`URL finale WFS : ${url}`)
224
- format = foundFormat
302
+ if (format === 'unknown') {
303
+ const detectedFormat = await detectFormatFromHeaders(url, log)
304
+ if (detectedFormat) {
305
+ format = detectedFormat
306
+ log.info(`Format détecté à partir des headers : ${format}`)
225
307
  } else {
226
- log.error('Ce service WFS ne propose aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV)')
308
+ log.warning('Aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV, XLSX, XLS, ODS, GPX, KMZ)')
227
309
  return null
228
310
  }
229
311
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@data-fair/catalog-csw",
3
3
  "description": "A simple CSW plugin for the Data Fair catalogs service.",
4
- "version": "0.1.0",
4
+ "version": "0.1.1",
5
5
  "main": "index.ts",
6
6
  "type": "module",
7
7
  "scripts": {
@@ -6,9 +6,20 @@ export const schemaExports: string[]
6
6
  * URL of the CSW service to connect to (GeoNetwork, ...)
7
7
  */
8
8
  export type CSWServiceURL = string;
9
+ /**
10
+ * A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.
11
+ *
12
+ * **Exploration by type:**
13
+ * To make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).
14
+ *
15
+ * **Data retrieval:**
16
+ * When a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.
17
+ */
18
+ export type DownloadAndFormatSelectionInfo = string;
9
19
 
10
20
  export type CSWConfig = {
11
21
  url: CSWServiceURL;
22
+ download_info?: DownloadAndFormatSelectionInfo;
12
23
  }
13
24
 
14
25
 
@@ -42,6 +42,21 @@ export const schema = {
42
42
  "type": "string",
43
43
  "format": "uri",
44
44
  "default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
45
+ },
46
+ "download_info": {
47
+ "type": "string",
48
+ "readOnly": true,
49
+ "title": "Download and Format Selection Info",
50
+ "x-i18n-title": {
51
+ "fr": "À savoir : Téléchargements et Formats"
52
+ },
53
+ "description": "A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.",
54
+ "x-i18n-description": {
55
+ "fr": "Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."
56
+ },
57
+ "layout": {
58
+ "cols": 4
59
+ }
45
60
  }
46
61
  },
47
62
  "required": [
@@ -5,7 +5,7 @@ import { fullFormats } from "ajv-formats/dist/formats.js";
5
5
  "use strict";
6
6
  export const validate = validate14;
7
7
  export default validate14;
8
- const schema16 = {"$id":"https://github.com/data-fair/catalog-csw/catalog-config","x-exports":["types","validate","schema"],"title":"CSWConfig","type":"object","additionalProperties":false,"properties":{"url":{"title":"CSW service URL","x-i18n-title":{"fr":"URL du service CSW"},"description":"URL of the CSW service to connect to (GeoNetwork, ...)","x-i18n-description":{"fr":"URL du service CSW auquel se connecter (GeoNetwork, ...)"},"type":"string","format":"uri","default":"https://geobretagne.fr/geonetwork/srv/fre/csw"}},"required":["url"]};
8
+ const schema16 = {"$id":"https://github.com/data-fair/catalog-csw/catalog-config","x-exports":["types","validate","schema"],"title":"CSWConfig","type":"object","additionalProperties":false,"properties":{"url":{"title":"CSW service URL","x-i18n-title":{"fr":"URL du service CSW"},"description":"URL of the CSW service to connect to (GeoNetwork, ...)","x-i18n-description":{"fr":"URL du service CSW auquel se connecter (GeoNetwork, ...)"},"type":"string","format":"uri","default":"https://geobretagne.fr/geonetwork/srv/fre/csw"},"download_info":{"type":"string","readOnly":true,"title":"Download and Format Selection Info","x-i18n-title":{"fr":"À savoir : Téléchargements et Formats"},"description":"A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.","x-i18n-description":{"fr":"Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."},"layout":{"cols":4}}},"required":["url"]};
9
9
  const formats0 = fullFormats.uri;
10
10
 
11
11
  function validate14(data, {instancePath="", parentData, parentDataProperty, rootData=data}={}){
@@ -24,7 +24,7 @@ vErrors.push(err0);
24
24
  errors++;
25
25
  }
26
26
  for(const key0 in data){
27
- if(!(key0 === "url")){
27
+ if(!((key0 === "url") || (key0 === "download_info"))){
28
28
  const err1 = {instancePath,schemaPath:"#/additionalProperties",keyword:"additionalProperties",params:{additionalProperty: key0},message:"must NOT have additional properties"};
29
29
  if(vErrors === null){
30
30
  vErrors = [err1];
@@ -60,9 +60,9 @@ vErrors.push(err3);
60
60
  errors++;
61
61
  }
62
62
  }
63
- }
64
- else {
65
- const err4 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
63
+ if(data.download_info !== undefined){
64
+ if(typeof data.download_info !== "string"){
65
+ const err4 = {instancePath:instancePath+"/download_info",schemaPath:"#/properties/download_info/type",keyword:"type",params:{type: "string"},message:"must be string"};
66
66
  if(vErrors === null){
67
67
  vErrors = [err4];
68
68
  }
@@ -71,6 +71,18 @@ vErrors.push(err4);
71
71
  }
72
72
  errors++;
73
73
  }
74
+ }
75
+ }
76
+ else {
77
+ const err5 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
78
+ if(vErrors === null){
79
+ vErrors = [err5];
80
+ }
81
+ else {
82
+ vErrors.push(err5);
83
+ }
84
+ errors++;
85
+ }
74
86
  validate14.errors = vErrors;
75
87
  return errors === 0;
76
88
  }
@@ -21,6 +21,21 @@
21
21
  "type": "string",
22
22
  "format": "uri",
23
23
  "default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
24
+ },
25
+ "download_info": {
26
+ "type": "string",
27
+ "readOnly": true,
28
+ "title": "Download and Format Selection Info",
29
+ "x-i18n-title": {
30
+ "fr": "À savoir : Téléchargements et Formats"
31
+ },
32
+ "description": "A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.",
33
+ "x-i18n-description": {
34
+ "fr": "Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."
35
+ },
36
+ "layout": {
37
+ "cols": 4
38
+ }
24
39
  }
25
40
  },
26
41
  "required": ["url"]