@data-fair/catalog-csw 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +3 -4
- package/lib/capabilities.ts +1 -0
- package/lib/list.ts +72 -45
- package/lib/prepare.ts +42 -0
- package/lib/resource/logo.svg +1 -0
- package/lib/utils/link-selection.ts +150 -68
- package/package.json +1 -1
- package/types/catalogConfig/.type/index.d.ts +11 -0
- package/types/catalogConfig/.type/index.js +15 -0
- package/types/catalogConfig/.type/validate.js +17 -5
- package/types/catalogConfig/schema.json +15 -0
package/index.ts
CHANGED
|
@@ -4,10 +4,8 @@ import { type CSWCapabilities, capabilities } from './lib/capabilities.ts'
|
|
|
4
4
|
|
|
5
5
|
const plugin: CatalogPlugin<CSWConfig, CSWCapabilities> = {
|
|
6
6
|
async prepare (context) {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
}
|
|
10
|
-
return context
|
|
7
|
+
const prepare = (await import('./lib/prepare.ts')).default
|
|
8
|
+
return prepare(context)
|
|
11
9
|
},
|
|
12
10
|
async list (context) {
|
|
13
11
|
const { list } = await import('./lib/list.ts')
|
|
@@ -21,6 +19,7 @@ const plugin: CatalogPlugin<CSWConfig, CSWCapabilities> = {
|
|
|
21
19
|
|
|
22
20
|
metadata: {
|
|
23
21
|
title: 'CSW',
|
|
22
|
+
thumbnailPath: './lib/resource/logo.svg',
|
|
24
23
|
i18n: {
|
|
25
24
|
en: { description: 'Uses CSW 2.0.2 to import datasets (GeoNetwork, ...)' },
|
|
26
25
|
fr: { description: 'Utilise du CSW 2.0.2 pour importer des datasets (GeoNetwork, ...)' }
|
package/lib/capabilities.ts
CHANGED
package/lib/list.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { CatalogPlugin, ListContext } from '@data-fair/types-catalogs'
|
|
1
|
+
import type { CatalogPlugin, ListContext, Folder } from '@data-fair/types-catalogs'
|
|
2
2
|
import type { CSWConfig } from '#types'
|
|
3
3
|
import type { CswRecord } from './utils/types.ts'
|
|
4
4
|
import { XMLParser } from 'fast-xml-parser'
|
|
@@ -21,58 +21,84 @@ const parser = new XMLParser({
|
|
|
21
21
|
*/
|
|
22
22
|
export const list = async (config: ListContext<CSWConfig, typeof capabilities>): ReturnType<CatalogPlugin<CSWConfig>['list']> => {
|
|
23
23
|
const { catalogConfig, params } = config
|
|
24
|
+
const currentFolderId = params?.currentFolderId
|
|
25
|
+
|
|
26
|
+
if (!currentFolderId) {
|
|
27
|
+
const cswBody = `
|
|
28
|
+
<csw:GetRecords
|
|
29
|
+
xmlns:csw="http://www.opengis.net/cat/csw/2.0.2"
|
|
30
|
+
service="CSW" version="2.0.2" resultType="results"
|
|
31
|
+
startPosition="1" maxRecords="100"
|
|
32
|
+
outputSchema="http://www.opengis.net/cat/csw/2.0.2">
|
|
33
|
+
<csw:Query typeNames="csw:Record">
|
|
34
|
+
<csw:ElementSetName>brief</csw:ElementSetName>
|
|
35
|
+
</csw:Query>
|
|
36
|
+
</csw:GetRecords>`
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const response = await axios.post(catalogConfig.url, cswBody, {
|
|
40
|
+
headers: { 'Content-Type': 'application/xml' }
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
const parsed = parser.parse(response.data)
|
|
44
|
+
const root = parsed.GetRecordsResponse || parsed['csw:GetRecordsResponse']
|
|
45
|
+
const searchResults = root?.SearchResults || root?.['csw:SearchResults']
|
|
46
|
+
const rawRecords = searchResults?.BriefRecord || searchResults?.SummaryRecord || searchResults?.Record || []
|
|
47
|
+
const records = asArray(rawRecords)
|
|
48
|
+
|
|
49
|
+
const typesSet = new Set<string>()
|
|
50
|
+
records.forEach((record: any) => {
|
|
51
|
+
const typeStr = getText(record.type || record['dc:type'])
|
|
52
|
+
if (typeStr && typeStr !== 'unknown') {
|
|
53
|
+
typesSet.add(typeStr.toLowerCase())
|
|
54
|
+
}
|
|
55
|
+
})
|
|
56
|
+
const folders = Array.from(typesSet).map(type => ({
|
|
57
|
+
id: type,
|
|
58
|
+
title: type.toUpperCase(),
|
|
59
|
+
type: 'folder'
|
|
60
|
+
} as Folder))
|
|
61
|
+
|
|
62
|
+
if (folders.length === 0) {
|
|
63
|
+
folders.push({ id: 'all', title: 'TOUS LES DOCUMENTS', type: 'folder' as const })
|
|
64
|
+
}
|
|
65
|
+
return {
|
|
66
|
+
count: folders.length,
|
|
67
|
+
results: folders,
|
|
68
|
+
path: []
|
|
69
|
+
}
|
|
70
|
+
} catch (error: any) {
|
|
71
|
+
console.error('Erreur GetDomain:', error.message)
|
|
72
|
+
return { count: 0, results: [], path: [] }
|
|
73
|
+
}
|
|
74
|
+
}
|
|
24
75
|
const query = params?.q ? params.q.trim() : ''
|
|
25
76
|
const page = Number(params?.page || 1)
|
|
26
77
|
const size = Number(params?.size || 10)
|
|
27
78
|
const startPosition = (page - 1) * size + 1
|
|
28
79
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
</ogc:PropertyIsLike>
|
|
36
|
-
|
|
37
|
-
<ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
|
|
38
|
-
<ogc:PropertyName>AnyText</ogc:PropertyName>
|
|
39
|
-
<ogc:Literal>%JSON%</ogc:Literal>
|
|
40
|
-
</ogc:PropertyIsLike>
|
|
41
|
-
|
|
42
|
-
<ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
|
|
43
|
-
<ogc:PropertyName>AnyText</ogc:PropertyName>
|
|
44
|
-
<ogc:Literal>%CSV%</ogc:Literal>
|
|
45
|
-
</ogc:PropertyIsLike>
|
|
46
|
-
|
|
47
|
-
<ogc:PropertyIsLike matchCase="false" wildCard="%" singleChar="_" escapeChar="\\">
|
|
48
|
-
<ogc:PropertyName>AnyText</ogc:PropertyName>
|
|
49
|
-
<ogc:Literal>%ZIP%</ogc:Literal>
|
|
50
|
-
</ogc:PropertyIsLike>
|
|
51
|
-
</ogc:Or>`
|
|
52
|
-
|
|
53
|
-
const filterBlock = query
|
|
80
|
+
const typeFilter = `
|
|
81
|
+
<ogc:PropertyIsEqualTo>
|
|
82
|
+
<ogc:PropertyName>dc:type</ogc:PropertyName> <ogc:Literal>${currentFolderId}</ogc:Literal>
|
|
83
|
+
</ogc:PropertyIsEqualTo>`
|
|
84
|
+
|
|
85
|
+
const filterContent = query
|
|
54
86
|
? `
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
<ogc:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
:
|
|
87
|
+
<ogc:And>
|
|
88
|
+
${typeFilter}
|
|
89
|
+
<ogc:PropertyIsLike wildCard="%" singleChar="_" escapeChar="\\\\">
|
|
90
|
+
<ogc:PropertyName>AnyText</ogc:PropertyName>
|
|
91
|
+
<ogc:Literal>%${query}%</ogc:Literal>
|
|
92
|
+
</ogc:PropertyIsLike>
|
|
93
|
+
</ogc:And>`
|
|
94
|
+
: typeFilter
|
|
63
95
|
|
|
64
96
|
const constraintBlock = `
|
|
65
97
|
<csw:Constraint version="1.1.0">
|
|
66
98
|
<ogc:Filter>
|
|
67
|
-
${
|
|
99
|
+
${filterContent}
|
|
68
100
|
</ogc:Filter>
|
|
69
|
-
</csw:Constraint
|
|
70
|
-
<ogc:SortBy xmlns:ogc="http://www.opengis.net/ogc">
|
|
71
|
-
<ogc:SortProperty>
|
|
72
|
-
<ogc:PropertyName>RevisionDate</ogc:PropertyName>
|
|
73
|
-
<ogc:SortOrder>DESC</ogc:SortOrder>
|
|
74
|
-
</ogc:SortProperty>
|
|
75
|
-
</ogc:SortBy>`
|
|
101
|
+
</csw:Constraint>`
|
|
76
102
|
|
|
77
103
|
const cswBody = `
|
|
78
104
|
<csw:GetRecords
|
|
@@ -119,20 +145,21 @@ export const list = async (config: ListContext<CSWConfig, typeof capabilities>):
|
|
|
119
145
|
const titleRecord = getText(record.title || record['dc:title']) || 'Sans titre'
|
|
120
146
|
const rawDateObj = record.modified || record.date || record.dateStamp || record.RevisionDate
|
|
121
147
|
const dateRaw = getText(rawDateObj)
|
|
122
|
-
const type = getText(record.type || record['dc:type'])
|
|
123
148
|
return {
|
|
124
149
|
id: identifier,
|
|
125
150
|
title: titleRecord,
|
|
126
151
|
updatedAt: dateRaw || new Date().toISOString(),
|
|
127
152
|
type: 'resource',
|
|
128
|
-
format:
|
|
153
|
+
format: currentFolderId
|
|
129
154
|
}
|
|
130
155
|
}) as ResourceList
|
|
131
156
|
|
|
132
157
|
return {
|
|
133
158
|
count: totalCount,
|
|
134
159
|
results: listResults,
|
|
135
|
-
path: [
|
|
160
|
+
path: [
|
|
161
|
+
{ id: currentFolderId, title: currentFolderId.toUpperCase(), type: 'folder' }
|
|
162
|
+
]
|
|
136
163
|
}
|
|
137
164
|
} catch (error: any) {
|
|
138
165
|
console.error('ERREUR :', error.message)
|
package/lib/prepare.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import dns from 'dns/promises'
|
|
2
|
+
import { URL } from 'url'
|
|
3
|
+
import type { CSWConfig } from '#types'
|
|
4
|
+
import type { CSWCapabilities } from './capabilities.ts'
|
|
5
|
+
import type { PrepareContext } from '@data-fair/types-catalogs'
|
|
6
|
+
|
|
7
|
+
export default async ({ catalogConfig }: PrepareContext<CSWConfig, CSWCapabilities>) => {
|
|
8
|
+
if (!catalogConfig || !catalogConfig.url) {
|
|
9
|
+
throw new Error("L'URL du catalogue est obligatoire.")
|
|
10
|
+
}
|
|
11
|
+
const urlString = catalogConfig.url.trim()
|
|
12
|
+
let url: URL
|
|
13
|
+
try {
|
|
14
|
+
url = new URL(urlString)
|
|
15
|
+
} catch (err) {
|
|
16
|
+
throw new Error("L'URL fournie n'est pas valide.")
|
|
17
|
+
}
|
|
18
|
+
if (!['http:', 'https:'].includes(url.protocol)) {
|
|
19
|
+
throw new Error('Seuls les protocoles HTTP et HTTPS sont autorisés.')
|
|
20
|
+
}
|
|
21
|
+
try {
|
|
22
|
+
const { address } = await dns.lookup(url.hostname, { family: 4 })
|
|
23
|
+
const isPrivateIp =
|
|
24
|
+
/^127\./.test(address) ||
|
|
25
|
+
/^10\./.test(address) ||
|
|
26
|
+
/^192\.168\./.test(address) ||
|
|
27
|
+
/^169\.254\./.test(address) ||
|
|
28
|
+
/^0\./.test(address) ||
|
|
29
|
+
/^172\.(1[6-9]|2\d|3[0-1])\./.test(address)
|
|
30
|
+
|
|
31
|
+
if (isPrivateIp) {
|
|
32
|
+
throw new Error(`L'URL est interdite car elle pointe vers un réseau interne (${address}).`)
|
|
33
|
+
}
|
|
34
|
+
} catch (err: any) {
|
|
35
|
+
if (err.message && err.message.includes('interdite')) throw err
|
|
36
|
+
throw new Error(`Impossible de résoudre l'adresse : ${url.hostname}`)
|
|
37
|
+
}
|
|
38
|
+
catalogConfig.url = url.toString()
|
|
39
|
+
return {
|
|
40
|
+
catalogConfig
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" width="24" height="24" fill="#000000" style="opacity:1;"><path d="M49.953 5A45 45 0 0 0 7.758 34.498H2.5A3 3 0 0 0-.502 37.5v25A3 3 0 0 0 2.5 65.502h5.256A45 45 0 0 0 50 95a45 45 0 0 0 42.242-29.498H97.5a3 3 0 0 0 3.002-3.002v-25a3 3 0 0 0-3.002-3.002h-5.256A45 45 0 0 0 50 5zm2.297 5.113c4.74.807 9.271 4.713 12.84 11.194c1.15 2.089 2.18 4.433 3.068 6.974H52.25zm-4.5.178v17.99H32.676c.889-2.541 1.916-4.885 3.066-6.974c3.36-6.1 7.571-9.915 12.008-11.016M37.383 11.51c-2.092 2.116-3.971 4.698-5.584 7.627c-1.512 2.745-2.813 5.819-3.881 9.144h-12.11A40.52 40.52 0 0 1 37.384 11.51m26.469.416c8.457 3.07 15.586 8.88 20.34 16.355H72.91c-1.066-3.326-2.365-6.4-3.877-9.144c-1.509-2.74-3.251-5.174-5.181-7.211M13.33 32.78h13.328c-.135.561-.246 1.143-.367 1.717h-13.71a39 39 0 0 1 .75-1.717m17.977 0H47.75v1.717H30.908c.131-.574.253-1.158.399-1.717m20.943 0h17.275c.146.56.268 1.143.399 1.717H52.25zm21.92 0h12.5c.265.565.512 1.138.75 1.717H74.535c-.12-.574-.23-1.156-.365-1.717m-46.861 9.065q3.188 0 5.007 1.166q1.83 1.166 2.717 3.584l-4.605.925q-.242-.697-.506-1.019a2.94 2.94 0 0 0-1.07-.834a3.35 3.35 0 0 0-1.416-.291q-1.773 0-2.717 1.291q-.714.958-.715 3.01q.001 2.541.853 3.49q.853.937 2.395.937q1.497.001 2.256-.76q.771-.76 1.117-2.208l4.568 1.25q-.46 1.74-1.449 2.906a6.2 6.2 0 0 1-2.465 1.76q-1.46.594-3.728.593q-2.752 0-4.502-.718q-1.738-.729-3.004-2.551t-1.266-4.668q0-3.791 2.221-5.822q2.233-2.041 6.309-2.041m19.523 0q3.236 0 4.928 1.093q1.703 1.084 2.025 3.457l-4.914.262q-.196-1.03-.83-1.5q-.621-.468-1.727-.469q-.909 0-1.369.354q-.46.344-.46.844q0 .365.38.656q.368.302 1.748.562q3.42.667 4.893 1.354q1.486.677 2.154 1.687q.679 1.012.678 2.262q0 1.468-.897 2.707q-.897 1.24-2.51 1.887q-1.612.635-4.064.635q-4.305 0-5.963-1.5q-1.657-1.5-1.877-3.813l4.963-.281q.162 1.094.656 1.666q.806.928 2.301.928q1.118 0 1.717-.469q.61-.48.61-1.104q0-.593-.577-1.062q-.575-.47-2.67-.887q-3.43-.697-4.892-1.853q-1.475-1.156-1.475-2.948q0-1.177.75-2.218q.76-1.052 2.268-1.647q1.519-.604 4.154-.603m10.604.26h4.949l1.785 8.53l2.601-8.53h4.94l2.611 8.53l1.785-8.53h4.928l-3.719 15.271h-5.11l-2.96-9.615l-2.947 9.615h-5.112zM12.58 65.501h13.524c.112.573.214 1.154.341 1.715H13.33a39 39 0 0 1-.75-1.715m18.129 0H47.75v1.715H31.082c-.137-.56-.25-1.142-.373-1.715m21.541 0h17.873c-.123.573-.236 1.155-.373 1.715h-17.5zm22.473 0H87.42a39 39 0 0 1-.75 1.715H74.38c.128-.561.23-1.142.343-1.715m-58.914 6.215h11.824c1.117 3.675 2.518 7.056 4.166 10.049c1.294 2.35 2.762 4.472 4.369 6.316c-8.466-3.07-15.603-8.884-20.36-16.365m16.554 0H47.75v18.719c-.277-.016-.55-.044-.826-.065c-4.132-1.35-8.032-5.057-11.182-10.777c-1.285-2.335-2.424-4.984-3.379-7.877m19.887 0h16.219c-.955 2.893-2.094 5.542-3.38 7.877c-3.084 5.602-6.888 9.278-10.925 10.695q-.951.096-1.914.147zm20.947 0h10.994a40.56 40.56 0 0 1-19.105 15.877c1.443-1.728 2.766-3.684 3.947-5.828c1.648-2.993 3.049-6.373 4.164-10.05" color="currentColor"/></svg>
|
|
@@ -15,10 +15,12 @@ export const isUrlValid = async (url: string, log: any, isWFSTest = false): Prom
|
|
|
15
15
|
})
|
|
16
16
|
|
|
17
17
|
if (response.status >= 400) {
|
|
18
|
+
log.warning(`URL non valide : ${url}`)
|
|
18
19
|
return false
|
|
19
20
|
}
|
|
20
21
|
const content = String(response.data)
|
|
21
22
|
if (content.includes('ExceptionReport') || content.includes('ServiceException')) {
|
|
23
|
+
log.warning(`URL non valide : ${url}`)
|
|
22
24
|
return false
|
|
23
25
|
}
|
|
24
26
|
// if the requested format is JSON but the response is XML, it's likely that the WFS doesn't support the requested format
|
|
@@ -34,10 +36,120 @@ export const isUrlValid = async (url: string, log: any, isWFSTest = false): Prom
|
|
|
34
36
|
return true
|
|
35
37
|
}
|
|
36
38
|
} catch (err) {
|
|
39
|
+
log.warning(`URL non valide : ${url}`)
|
|
37
40
|
return false
|
|
38
41
|
}
|
|
39
42
|
}
|
|
40
43
|
|
|
44
|
+
const negotiateWfsFormat = async (
|
|
45
|
+
originalUrl: string,
|
|
46
|
+
resourceId: string,
|
|
47
|
+
layerName: string | null | undefined,
|
|
48
|
+
log: any
|
|
49
|
+
): Promise<{ url: string; format: string } | null> => {
|
|
50
|
+
log.info(`Service WFS détecté sur ${originalUrl}, test des formats supportés...`)
|
|
51
|
+
|
|
52
|
+
const [baseUrl, existingQuery] = originalUrl.split('?')
|
|
53
|
+
const params = new URLSearchParams(existingQuery || '')
|
|
54
|
+
|
|
55
|
+
const keysToDelete: string[] = []
|
|
56
|
+
for (const key of params.keys()) {
|
|
57
|
+
const lowerKey = key.toLowerCase()
|
|
58
|
+
if (['service', 'request', 'version', 'typename', 'typenames', 'outputformat', 'srsname'].includes(lowerKey)) {
|
|
59
|
+
keysToDelete.push(key)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
keysToDelete.forEach(k => params.delete(k))
|
|
63
|
+
|
|
64
|
+
params.set('SERVICE', 'WFS')
|
|
65
|
+
params.set('VERSION', '2.0.0')
|
|
66
|
+
params.set('REQUEST', 'GetFeature')
|
|
67
|
+
params.set('TYPENAMES', layerName || resourceId)
|
|
68
|
+
|
|
69
|
+
const formatsToTry = [
|
|
70
|
+
{ param: 'application/json; subtype=geojson', format: 'geojson' },
|
|
71
|
+
{ param: 'geojson', format: 'geojson' },
|
|
72
|
+
{ param: 'application/json', format: 'geojson' },
|
|
73
|
+
{ param: 'application/vnd.geo+json', format: 'geojson' },
|
|
74
|
+
{ param: 'json', format: 'geojson' },
|
|
75
|
+
{ param: 'SHAPE-ZIP', format: 'shapefile' },
|
|
76
|
+
{ param: 'shapezip', format: 'shapefile' },
|
|
77
|
+
{ param: 'application/zip', format: 'shapefile' },
|
|
78
|
+
{ param: 'application/x-shapefile', format: 'shapefile' },
|
|
79
|
+
{ param: 'csv', format: 'csv' },
|
|
80
|
+
{ param: 'text/csv', format: 'csv' },
|
|
81
|
+
{ param: 'kml', format: 'kml' },
|
|
82
|
+
{ param: 'application/vnd.google-earth.kml+xml', format: 'kml' }
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
for (const f of formatsToTry) {
|
|
86
|
+
const testParams = new URLSearchParams(params)
|
|
87
|
+
testParams.set('OUTPUTFORMAT', f.param)
|
|
88
|
+
const testUrl = `${baseUrl}?${testParams.toString()}`
|
|
89
|
+
if (await isUrlValid(testUrl, log, true)) {
|
|
90
|
+
log.info(`Format WFS supporté trouvé : ${f.param}. URL finale WFS : ${testUrl}`)
|
|
91
|
+
return {
|
|
92
|
+
url: testUrl,
|
|
93
|
+
format: f.format
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
log.error('Ce service WFS ne propose aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV)')
|
|
99
|
+
return null
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const detectFormatFromHeaders = async (url: string, log: any): Promise<string | null> => {
|
|
103
|
+
try {
|
|
104
|
+
const response = await axios.head(url, {
|
|
105
|
+
timeout: 5000,
|
|
106
|
+
validateStatus: (status) => status < 400
|
|
107
|
+
})
|
|
108
|
+
const contentType = response.headers['content-type']
|
|
109
|
+
if (!contentType) return null
|
|
110
|
+
|
|
111
|
+
if (contentType.includes('application/json') || contentType.includes('application/geo+json')) {
|
|
112
|
+
return 'geojson'
|
|
113
|
+
}
|
|
114
|
+
if (contentType.includes('application/zip') || contentType.includes('application/x-zip-compressed')) {
|
|
115
|
+
return 'shapefile'
|
|
116
|
+
}
|
|
117
|
+
if (contentType.includes('text/csv') || contentType.includes('application/csv')) {
|
|
118
|
+
return 'csv'
|
|
119
|
+
}
|
|
120
|
+
if (contentType.includes('kml') || contentType.includes('xml')) {
|
|
121
|
+
return 'kml'
|
|
122
|
+
}
|
|
123
|
+
if (contentType.includes('text/tab-separated-values') || contentType.includes('text/tsv')) {
|
|
124
|
+
return 'tsv'
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (contentType.includes('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')) {
|
|
128
|
+
return 'xlsx'
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
if (contentType.includes('application/vnd.ms-excel')) {
|
|
132
|
+
return 'xls'
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (contentType.includes('application/vnd.oasis.opendocument.spreadsheet')) {
|
|
136
|
+
return 'ods'
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (contentType.includes('application/gpx+xml') || contentType.includes('gpx')) {
|
|
140
|
+
return 'gpx'
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (contentType.includes('application/vnd.google-earth.kmz') || contentType.includes('kmz')) {
|
|
144
|
+
return 'kmz'
|
|
145
|
+
}
|
|
146
|
+
return null
|
|
147
|
+
} catch (error) {
|
|
148
|
+
log.warning(`Impossible de détecter le format pour ${url} (Erreur HTTP).`)
|
|
149
|
+
return null
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
41
153
|
const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
|
|
42
154
|
const link = linkWrapper.CI_OnlineResource || linkWrapper
|
|
43
155
|
const url = getText(link.linkage?.URL)
|
|
@@ -59,14 +171,18 @@ const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
|
|
|
59
171
|
return { url, format, score }
|
|
60
172
|
}
|
|
61
173
|
|
|
174
|
+
if (u.includes('/api/data/') || u.includes('/api/records/')) {
|
|
175
|
+
return { url, format: 'shapefile', score: 11 }
|
|
176
|
+
}
|
|
177
|
+
|
|
62
178
|
// 2. Shapefile (Zip)
|
|
63
179
|
if (u.includes('shape-zip') || u.endsWith('.zip') || n.includes('shapefile')) {
|
|
64
|
-
return { url, format: 'shapefile', score:
|
|
180
|
+
return { url, format: 'shapefile', score: 8 }
|
|
65
181
|
}
|
|
66
182
|
|
|
67
183
|
// 3. GeoJSON
|
|
68
184
|
if (u.includes('geojson') || p.includes('geo+json') || n.includes('geojson')) {
|
|
69
|
-
return { url, format: 'geojson', score:
|
|
185
|
+
return { url, format: 'geojson', score: 10 }
|
|
70
186
|
}
|
|
71
187
|
|
|
72
188
|
// 4. CSV
|
|
@@ -89,7 +205,7 @@ const analyzeLink = (linkWrapper: any): DownloadCandidate | null => {
|
|
|
89
205
|
return { url, format: 'wfs_service', score: 2, layerName: name }
|
|
90
206
|
}
|
|
91
207
|
|
|
92
|
-
return
|
|
208
|
+
return { url, format: 'unknown', score: 1 }
|
|
93
209
|
}
|
|
94
210
|
|
|
95
211
|
/**
|
|
@@ -138,10 +254,20 @@ export const findBestDownloadUrl = async (metadata: any, resourceId: string, log
|
|
|
138
254
|
log.info(`${candidates.length} liens candidats trouvés. Vérification...`)
|
|
139
255
|
|
|
140
256
|
let bestCandidate: DownloadCandidate | null = null
|
|
141
|
-
|
|
142
257
|
for (const candidate of candidates) {
|
|
143
|
-
|
|
144
|
-
|
|
258
|
+
if (candidate.format === 'wfs_service') {
|
|
259
|
+
log.info('lien WFS detecté, validation en cours...')
|
|
260
|
+
if (await isUrlValid(candidate.url, log)) {
|
|
261
|
+
bestCandidate = candidate
|
|
262
|
+
log.info(`Lien WFS validé : ${candidate.url}`)
|
|
263
|
+
break
|
|
264
|
+
} else {
|
|
265
|
+
const wfsResult = await negotiateWfsFormat(candidate.url, resourceId, candidate.layerName, log)
|
|
266
|
+
if (wfsResult) {
|
|
267
|
+
return wfsResult
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
} else if (candidate.format !== 'wfs_service') {
|
|
145
271
|
if (await isUrlValid(candidate.url, log)) {
|
|
146
272
|
bestCandidate = candidate
|
|
147
273
|
log.info(`Lien direct validé : ${candidate.url}`)
|
|
@@ -157,73 +283,29 @@ export const findBestDownloadUrl = async (metadata: any, resourceId: string, log
|
|
|
157
283
|
return null
|
|
158
284
|
}
|
|
159
285
|
|
|
160
|
-
let { url, format
|
|
161
|
-
|
|
162
|
-
if (url.includes('/api/data/') && !url.toLowerCase().includes('format=')) {
|
|
163
|
-
const separator = url.includes('?') ? '&' : '?'
|
|
164
|
-
url = `${url}${separator}format=csv`
|
|
165
|
-
format = 'csv'
|
|
166
|
-
}
|
|
286
|
+
let { url, format } = bestCandidate
|
|
167
287
|
|
|
168
|
-
if (
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
const
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
keysToDelete.push(key)
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
keysToDelete.forEach(k => params.delete(k))
|
|
180
|
-
|
|
181
|
-
params.set('SERVICE', 'WFS')
|
|
182
|
-
params.set('VERSION', '2.0.0')
|
|
183
|
-
params.set('REQUEST', 'GetFeature')
|
|
184
|
-
if (layerName) {
|
|
185
|
-
params.set('TYPENAMES', layerName)
|
|
288
|
+
if (url.includes('/api/data/')) {
|
|
289
|
+
const urlObj = new URL(url)
|
|
290
|
+
urlObj.searchParams.delete('format')
|
|
291
|
+
const cleanUrl = urlObj.toString()
|
|
292
|
+
const detectedFormat = await detectFormatFromHeaders(cleanUrl, log)
|
|
293
|
+
if (detectedFormat) {
|
|
294
|
+
url = cleanUrl
|
|
295
|
+
format = detectedFormat
|
|
186
296
|
} else {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
const formatsToTry = [
|
|
191
|
-
{ param: 'application/json; subtype=geojson', format: 'geojson' },
|
|
192
|
-
{ param: 'geojson', format: 'geojson' },
|
|
193
|
-
{ param: 'application/json', format: 'geojson' },
|
|
194
|
-
{ param: 'application/vnd.geo+json', format: 'geojson' },
|
|
195
|
-
{ param: 'json', format: 'geojson' },
|
|
196
|
-
{ param: 'SHAPE-ZIP', format: 'shapefile' },
|
|
197
|
-
{ param: 'shapezip', format: 'shapefile' },
|
|
198
|
-
{ param: 'application/zip', format: 'shapefile' },
|
|
199
|
-
{ param: 'application/x-shapefile', format: 'shapefile' },
|
|
200
|
-
{ param: 'csv', format: 'csv' },
|
|
201
|
-
{ param: 'text/csv', format: 'csv' },
|
|
202
|
-
{ param: 'kml', format: 'kml' },
|
|
203
|
-
{ param: 'application/vnd.google-earth.kml+xml', format: 'kml' }
|
|
204
|
-
]
|
|
205
|
-
|
|
206
|
-
let foundUrl = null
|
|
207
|
-
let foundFormat = null
|
|
208
|
-
|
|
209
|
-
for (const f of formatsToTry) {
|
|
210
|
-
const testParams = new URLSearchParams(params)
|
|
211
|
-
testParams.set('OUTPUTFORMAT', f.param)
|
|
212
|
-
const testUrl = `${baseUrl}?${testParams.toString()}`
|
|
213
|
-
if (await isUrlValid(testUrl, log, true)) {
|
|
214
|
-
log.info(`Format WFS supporté trouvé : ${f.param}`)
|
|
215
|
-
foundUrl = testUrl
|
|
216
|
-
foundFormat = f.format
|
|
217
|
-
break // We stop at the first valid format fFound, prioritizing GeoJSON and Shapefile over others
|
|
218
|
-
}
|
|
297
|
+
url = cleanUrl
|
|
298
|
+
format = 'shapefile'
|
|
219
299
|
}
|
|
300
|
+
}
|
|
220
301
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
format =
|
|
302
|
+
if (format === 'unknown') {
|
|
303
|
+
const detectedFormat = await detectFormatFromHeaders(url, log)
|
|
304
|
+
if (detectedFormat) {
|
|
305
|
+
format = detectedFormat
|
|
306
|
+
log.info(`Format détecté à partir des headers : ${format}`)
|
|
225
307
|
} else {
|
|
226
|
-
log.
|
|
308
|
+
log.warning('Aucun format supporté par DataFair (GeoJSON, Shapefile, KML, CSV, XLSX, XLS, ODS, GPX, KMZ)')
|
|
227
309
|
return null
|
|
228
310
|
}
|
|
229
311
|
}
|
package/package.json
CHANGED
|
@@ -6,9 +6,20 @@ export const schemaExports: string[]
|
|
|
6
6
|
* URL of the CSW service to connect to (GeoNetwork, ...)
|
|
7
7
|
*/
|
|
8
8
|
export type CSWServiceURL = string;
|
|
9
|
+
/**
|
|
10
|
+
* A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.
|
|
11
|
+
*
|
|
12
|
+
* **Exploration by type:**
|
|
13
|
+
* To make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).
|
|
14
|
+
*
|
|
15
|
+
* **Data retrieval:**
|
|
16
|
+
* When a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.
|
|
17
|
+
*/
|
|
18
|
+
export type DownloadAndFormatSelectionInfo = string;
|
|
9
19
|
|
|
10
20
|
export type CSWConfig = {
|
|
11
21
|
url: CSWServiceURL;
|
|
22
|
+
download_info?: DownloadAndFormatSelectionInfo;
|
|
12
23
|
}
|
|
13
24
|
|
|
14
25
|
|
|
@@ -42,6 +42,21 @@ export const schema = {
|
|
|
42
42
|
"type": "string",
|
|
43
43
|
"format": "uri",
|
|
44
44
|
"default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
|
|
45
|
+
},
|
|
46
|
+
"download_info": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"readOnly": true,
|
|
49
|
+
"title": "Download and Format Selection Info",
|
|
50
|
+
"x-i18n-title": {
|
|
51
|
+
"fr": "À savoir : Téléchargements et Formats"
|
|
52
|
+
},
|
|
53
|
+
"description": "A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.",
|
|
54
|
+
"x-i18n-description": {
|
|
55
|
+
"fr": "Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."
|
|
56
|
+
},
|
|
57
|
+
"layout": {
|
|
58
|
+
"cols": 4
|
|
59
|
+
}
|
|
45
60
|
}
|
|
46
61
|
},
|
|
47
62
|
"required": [
|
|
@@ -5,7 +5,7 @@ import { fullFormats } from "ajv-formats/dist/formats.js";
|
|
|
5
5
|
"use strict";
|
|
6
6
|
export const validate = validate14;
|
|
7
7
|
export default validate14;
|
|
8
|
-
const schema16 = {"$id":"https://github.com/data-fair/catalog-csw/catalog-config","x-exports":["types","validate","schema"],"title":"CSWConfig","type":"object","additionalProperties":false,"properties":{"url":{"title":"CSW service URL","x-i18n-title":{"fr":"URL du service CSW"},"description":"URL of the CSW service to connect to (GeoNetwork, ...)","x-i18n-description":{"fr":"URL du service CSW auquel se connecter (GeoNetwork, ...)"},"type":"string","format":"uri","default":"https://geobretagne.fr/geonetwork/srv/fre/csw"}},"required":["url"]};
|
|
8
|
+
const schema16 = {"$id":"https://github.com/data-fair/catalog-csw/catalog-config","x-exports":["types","validate","schema"],"title":"CSWConfig","type":"object","additionalProperties":false,"properties":{"url":{"title":"CSW service URL","x-i18n-title":{"fr":"URL du service CSW"},"description":"URL of the CSW service to connect to (GeoNetwork, ...)","x-i18n-description":{"fr":"URL du service CSW auquel se connecter (GeoNetwork, ...)"},"type":"string","format":"uri","default":"https://geobretagne.fr/geonetwork/srv/fre/csw"},"download_info":{"type":"string","readOnly":true,"title":"Download and Format Selection Info","x-i18n-title":{"fr":"À savoir : Téléchargements et Formats"},"description":"A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.","x-i18n-description":{"fr":"Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."},"layout":{"cols":4}}},"required":["url"]};
|
|
9
9
|
const formats0 = fullFormats.uri;
|
|
10
10
|
|
|
11
11
|
function validate14(data, {instancePath="", parentData, parentDataProperty, rootData=data}={}){
|
|
@@ -24,7 +24,7 @@ vErrors.push(err0);
|
|
|
24
24
|
errors++;
|
|
25
25
|
}
|
|
26
26
|
for(const key0 in data){
|
|
27
|
-
if(!(key0 === "url")){
|
|
27
|
+
if(!((key0 === "url") || (key0 === "download_info"))){
|
|
28
28
|
const err1 = {instancePath,schemaPath:"#/additionalProperties",keyword:"additionalProperties",params:{additionalProperty: key0},message:"must NOT have additional properties"};
|
|
29
29
|
if(vErrors === null){
|
|
30
30
|
vErrors = [err1];
|
|
@@ -60,9 +60,9 @@ vErrors.push(err3);
|
|
|
60
60
|
errors++;
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const err4 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "
|
|
63
|
+
if(data.download_info !== undefined){
|
|
64
|
+
if(typeof data.download_info !== "string"){
|
|
65
|
+
const err4 = {instancePath:instancePath+"/download_info",schemaPath:"#/properties/download_info/type",keyword:"type",params:{type: "string"},message:"must be string"};
|
|
66
66
|
if(vErrors === null){
|
|
67
67
|
vErrors = [err4];
|
|
68
68
|
}
|
|
@@ -71,6 +71,18 @@ vErrors.push(err4);
|
|
|
71
71
|
}
|
|
72
72
|
errors++;
|
|
73
73
|
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
const err5 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
|
|
78
|
+
if(vErrors === null){
|
|
79
|
+
vErrors = [err5];
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
vErrors.push(err5);
|
|
83
|
+
}
|
|
84
|
+
errors++;
|
|
85
|
+
}
|
|
74
86
|
validate14.errors = vErrors;
|
|
75
87
|
return errors === 0;
|
|
76
88
|
}
|
|
@@ -21,6 +21,21 @@
|
|
|
21
21
|
"type": "string",
|
|
22
22
|
"format": "uri",
|
|
23
23
|
"default": "https://geobretagne.fr/geonetwork/srv/fre/csw"
|
|
24
|
+
},
|
|
25
|
+
"download_info": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"readOnly": true,
|
|
28
|
+
"title": "Download and Format Selection Info",
|
|
29
|
+
"x-i18n-title": {
|
|
30
|
+
"fr": "À savoir : Téléchargements et Formats"
|
|
31
|
+
},
|
|
32
|
+
"description": "A CSW catalog is primarily a data directory. It can happen that some documents do not contain a direct download link, or that these links no longer work (remote server unavailable). In these cases, the data cannot be retrieved.\n\n**Exploration by type:**\nTo make your search easier, documents are automatically organized by file type. We recommend selecting the 'dataset' folder if it is available. Once inside, you can use the search bar to filter the results by your desired format (e.g., csv, pdf, etc.).\n\n**Data retrieval:**\nWhen a document offers multiple links, the system automatically chooses the best option for you. It ensures the link is functional and prioritizes the most usable formats for the platform (GeoJSON first, then Shapefile, CSV, etc.). Even if the source does not clearly specify the format, the system analyzes the link behind the scenes to retrieve the data in its most exploitable form.",
|
|
33
|
+
"x-i18n-description": {
|
|
34
|
+
"fr": "Un catalogue CSW est un annuaire de données. Il peut arriver que certains documents ne contiennent pas de lien de téléchargement direct, ou que ces liens ne fonctionnent plus (serveur distant indisponible). Dans ces cas, les données ne pourront pas être récupérées.\n\n**Exploration par type :**\nPour faciliter vos recherches, les documents sont automatiquement organisés par types de fichiers. Il est recommandé de sélectionner le dossier « dataset » s'il est disponible. Une fois à l'intérieur, vous pouvez utiliser la barre de recherche pour filtrer les résultats selon le format souhaité (par exemple : CSV, JSON, etc.).\n\n**Récupération de la donnée :**\nLorsqu'un document propose plusieurs liens, le système choisit automatiquement la meilleure option pour vous. Il s'assure que le lien est fonctionnel et privilégie les formats les plus faciles à utiliser sur la plateforme (en priorité le GeoJSON, puis le Shapefile, le CSV, etc.). Même si la source ne précise pas clairement le format, le système analyse le lien en arrière-plan pour récupérer la donnée sous sa forme la plus exploitable."
|
|
35
|
+
},
|
|
36
|
+
"layout": {
|
|
37
|
+
"cols": 4
|
|
38
|
+
}
|
|
24
39
|
}
|
|
25
40
|
},
|
|
26
41
|
"required": ["url"]
|