@data-fair/catalog-data-fair 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/download.ts CHANGED
@@ -4,6 +4,7 @@ import axios from '@data-fair/lib-node/axios.js'
4
4
  import * as fs from 'fs'
5
5
  import { join } from 'path'
6
6
  import { Transform } from 'stream'
7
+ import slugify from 'slugify'
7
8
 
8
9
  /**
9
10
  * Retrieves a resource by first fetching its metadata and then downloading the actual resource.
@@ -15,8 +16,8 @@ import { Transform } from 'stream'
15
16
  export const getResource = async (context: GetResourceContext<DataFairConfig>): ReturnType<CatalogPlugin['getResource']> => {
16
17
  context.log.step('Import de la ressource')
17
18
 
18
- const resource = await getMetaData(context)
19
- resource.filePath = await downloadResource(context, resource)
19
+ const { resource, file } = await getMetaData(context)
20
+ resource.filePath = await downloadResource(context, file, resource)
20
21
 
21
22
  return resource
22
23
  }
@@ -27,11 +28,12 @@ export const getResource = async (context: GetResourceContext<DataFairConfig>):
27
28
  * @param resourceId the dataset Id to fetch fields from
28
29
  * @returns the Resource corresponding to the id by this configuration
29
30
  */
30
- const getMetaData = async ({ catalogConfig, resourceId, log }: GetResourceContext<DataFairConfig>): Promise<Resource> => {
31
+ const getMetaData = async ({ catalogConfig, resourceId, log, secrets }: GetResourceContext<DataFairConfig>): Promise<{ resource: Resource, file: boolean }> => {
31
32
  let dataset: DataFairDataset
32
33
  try {
33
34
  const url = `${catalogConfig.url}/data-fair/api/v1/datasets/${resourceId}`
34
- const res = (await axios.get(url))
35
+ const config = secrets.apiKey ? { headers: { 'x-apiKey': secrets.apiKey } } : undefined
36
+ const res = (await axios.get(url, config))
35
37
  if (res.status !== 200) {
36
38
  throw new Error(`HTTP error : ${res.status}, ${res.data}`)
37
39
  }
@@ -42,6 +44,17 @@ const getMetaData = async ({ catalogConfig, resourceId, log }: GetResourceContex
42
44
  throw new Error(`Erreur lors de la récuperation de la resource DataFair. ${e instanceof Error ? e.message : e}`)
43
45
  }
44
46
 
47
+ dataset.schema = (dataset.schema ?? []).map((field) => {
48
+ if (field['x-extension']) {
49
+ return {
50
+ ...field,
51
+ key: slugify.default(field.key.replace(/^_/, ''), { lower: true, strict: true, replacement: '_' }), // Ensure no leading underscore
52
+ 'x-extension': undefined, // Remove x-extension property if it exists
53
+ }
54
+ }
55
+ return field
56
+ })
57
+
45
58
  const resource: Resource = {
46
59
  id: resourceId,
47
60
  title: dataset.title,
@@ -51,7 +64,7 @@ const getMetaData = async ({ catalogConfig, resourceId, log }: GetResourceContex
51
64
  frequency: dataset.frequency,
52
65
  image: dataset.image,
53
66
  keywords: dataset.keywords,
54
- size: dataset.file?.size,
67
+ size: dataset.file?.size ?? dataset.storage?.size ?? dataset.originalFile?.size,
55
68
  schema: dataset.schema,
56
69
  filePath: '',
57
70
  }
@@ -62,7 +75,8 @@ const getMetaData = async ({ catalogConfig, resourceId, log }: GetResourceContex
62
75
  href: dataset.license.href ?? '',
63
76
  }
64
77
  }
65
- return resource
78
+
79
+ return { resource, file: !!dataset.file }
66
80
  }
67
81
 
68
82
  /**
@@ -72,12 +86,14 @@ const getMetaData = async ({ catalogConfig, resourceId, log }: GetResourceContex
72
86
  * @param res - the metadatas about the resource.
73
87
  * @returns A promise resolving to the file path of the downloaded CSV.
74
88
  */
75
- const downloadResource = async (context: GetResourceContext<DataFairConfig>, res: Resource): Promise<string> => {
89
+ const downloadResource = async (context: GetResourceContext<DataFairConfig>, file: boolean, res: Resource): Promise<string> => {
76
90
  const filePath = join(context.tmpDir, `${context.resourceId}.csv`)
77
91
  try {
78
- if (res.size && res.size > 0 && context.importConfig.fields?.length === 0 && context.importConfig.filters?.length === 0) {
92
+ if (file && !context.importConfig.fields?.length && !context.importConfig.filters?.length) {
93
+ await context.log.task('downloading', 'Téléchargement en cours... (taille approximative)', res.size || NaN)
79
94
  await downloadResourceFile(filePath, context)
80
95
  } else {
96
+ await context.log.task('downloading', 'Téléchargement en cours...', NaN)
81
97
  await downloadResourceLines(filePath, context)
82
98
  }
83
99
  return filePath
@@ -97,29 +113,47 @@ const downloadResource = async (context: GetResourceContext<DataFairConfig>, res
97
113
  * @returns A promise that resolves when the file is successfully downloaded and saved.
98
114
  * @throws If there is an error writing the file or fetching the dataset.
99
115
  */
100
- const downloadResourceFile = async (filePath: string, { catalogConfig, resourceId, log }: GetResourceContext<DataFairConfig>): Promise<void> => {
116
+ const downloadResourceFile = async (filePath: string, { catalogConfig, resourceId, log, secrets }: GetResourceContext<DataFairConfig>): Promise<void> => {
101
117
  const url = `${catalogConfig.url}/data-fair/api/v1/datasets/${resourceId}/full`
102
- log.info('Import des données de la ressource', url)
118
+ const headers = secrets.apiKey ? { 'x-apiKey': secrets.apiKey } : undefined
103
119
 
104
- const fileStream = fs.createWriteStream(filePath)
105
-
106
- const response = await axios.get(url, { responseType: 'stream' })
120
+ const response = await axios.get(url, { responseType: 'stream', headers })
107
121
 
108
122
  if (response.status !== 200) {
109
123
  throw new Error(`Error while fetching data: HTTP ${response.statusText}`)
110
124
  }
111
125
 
126
+ let downloaded = 0
127
+ let logPromise: Promise<void> | null = null
128
+
112
129
  return new Promise<void>((resolve, reject) => {
130
+ const fileStream = fs.createWriteStream(filePath, { encoding: 'binary' }) // Ensure binary encoding
131
+
132
+ response.data.on('data', (chunk: Buffer) => {
133
+ downloaded += chunk.length
134
+ if (!logPromise) {
135
+ logPromise = log.progress('downloading', downloaded)
136
+ .catch(err => console.warn('Progress logging failed:', err))
137
+ .finally(() => { logPromise = null })
138
+ }
139
+ })
140
+
113
141
  response.data.pipe(fileStream)
142
+
114
143
  fileStream.on('finish', () => {
144
+ fileStream.close()
115
145
  resolve()
116
146
  })
147
+
117
148
  response.data.on('error', (err: any) => {
118
- fs.unlink(filePath, () => { }) // Delete the file in case of error
149
+ fileStream.destroy()
150
+ fs.unlink(filePath, () => { })
119
151
  reject(err)
120
152
  })
153
+
121
154
  fileStream.on('error', (err) => {
122
- fs.unlink(filePath, () => { }) // Delete the file in case of error
155
+ response.data.destroy()
156
+ fs.unlink(filePath, () => { })
123
157
  reject(err)
124
158
  })
125
159
  })
@@ -135,13 +169,15 @@ const downloadResourceFile = async (filePath: string, { catalogConfig, resourceI
135
169
  * @returns A promise that resolves when the file is successfully downloaded and saved.
136
170
  * @throws If there is an error writing the file or fetching the dataset.
137
171
  */
138
- const downloadResourceLines = async (destFile: string, { catalogConfig, resourceId, importConfig, log }: GetResourceContext<DataFairConfig> & { importConfig: ImportConfig }) => {
139
- let url: string | null = `${catalogConfig.url}/data-fair/api/v1/datasets/${resourceId}/lines?format=csv&size=3000`
172
+ const downloadResourceLines = async (destFile: string, { catalogConfig, resourceId, importConfig, secrets, log }: GetResourceContext<DataFairConfig> & { importConfig: ImportConfig }) => {
173
+ let url: string | null = `${catalogConfig.url}/data-fair/api/v1/datasets/${resourceId}/lines?format=csv&size=5000`
140
174
 
141
175
  if (importConfig.fields) {
142
176
  url += '&select=' + importConfig.fields.map(field => field.key).join(',')
143
177
  }
144
178
 
179
+ const headers = secrets.apiKey ? { 'x-apiKey': secrets.apiKey } : undefined
180
+
145
181
  if (importConfig.filters) {
146
182
  importConfig.filters.forEach((filter) => {
147
183
  switch (filter.type) {
@@ -160,13 +196,14 @@ const downloadResourceLines = async (destFile: string, { catalogConfig, resource
160
196
  })
161
197
  }
162
198
 
163
- log.info('Import des données de la ressource', url)
199
+ let downloaded = 0
200
+ let pendingLogPromise: Promise<void> | null = null
201
+
164
202
  const writer = fs.createWriteStream(destFile)
165
203
  let isFirstChunk = true
166
204
 
167
205
  while (url) {
168
- console.log(url)
169
- const response = await axios.get(url, { responseType: 'stream' })
206
+ const response = await axios.get(url, { responseType: 'stream', headers })
170
207
  if (response.status !== 200) {
171
208
  throw new Error(`Error while fetching data: HTTP ${response.statusText}`)
172
209
  }
@@ -189,6 +226,18 @@ const downloadResourceLines = async (destFile: string, { catalogConfig, resource
189
226
  }
190
227
  }))
191
228
  }
229
+
230
+ stream.on('data', (chunk: Buffer) => {
231
+ downloaded += chunk.length
232
+
233
+ // Only start a new log promise if there isn't one already running
234
+ if (!pendingLogPromise) {
235
+ pendingLogPromise = log.progress('downloading', downloaded)
236
+ .catch(err => console.warn('Progress logging failed:', err))
237
+ .finally(() => { pendingLogPromise = null })
238
+ }
239
+ })
240
+
192
241
  stream.pipe(writer, { end: false })
193
242
  stream.on('end', () => {
194
243
  const linkHeader = response.headers.link
package/lib/imports.ts CHANGED
@@ -20,6 +20,7 @@ const prepareCatalog = (dataFairDatasets: DataFairDataset[]): ResourceList => {
20
20
  format: 'csv',
21
21
  size: dataFairDataset.file?.size ?? dataFairDataset.storage?.size ?? dataFairDataset.originalFile?.size,
22
22
  type: 'resource',
23
+ origin: dataFairDataset.page
23
24
  } as ResourceList[number])
24
25
  }
25
26
  return catalog
@@ -38,8 +39,9 @@ export const listResources = async (config: ListResourcesContext<DataFairConfig,
38
39
 
39
40
  let data: DataFairCatalog
40
41
  const url = `${config.catalogConfig.url}/data-fair/api/v1/catalog/datasets`
42
+ const headers = config.secrets.apiKey ? { 'x-apiKey': config.secrets.apiKey } : undefined
41
43
  try {
42
- const res = (await axios.get(url, { params: dataFairParams }))
44
+ const res = (await axios.get(url, { params: dataFairParams, headers }))
43
45
  if (res.status !== 200) {
44
46
  throw new Error(`HTTP error : ${res.status}, ${res.data}`)
45
47
  }
package/lib/prepare.ts CHANGED
@@ -4,15 +4,27 @@ import type { DataFairConfig } from '#types'
4
4
  import axios from '@data-fair/lib-node/axios.js'
5
5
 
6
6
  export default async ({ catalogConfig, capabilities, secrets }: PrepareContext<DataFairConfig, DataFairCapabilities>) => {
7
- // To remove when catalog/datasets allows q parameters
8
- capabilities = capabilities.filter(c => c !== 'search')
7
+ // set the apiKey in the secrets field if it exists
8
+ const apiKey = catalogConfig.apiKey
9
+ if (apiKey && apiKey !== '*************************') {
10
+ secrets.apiKey = apiKey
11
+ catalogConfig.apiKey = '*************************'
12
+ } else if (secrets?.apiKey && (!apiKey || apiKey === '')) {
13
+ delete secrets.apiKey
14
+ } else {
15
+ // The secret is already set, do nothing
16
+ }
9
17
 
10
18
  // test the url
11
19
  try {
12
- await axios.get(catalogConfig.url + '/data-fair/api/v1/catalog/datasets?size=1&select=id')
20
+ if (!catalogConfig.url) {
21
+ throw new Error('URL du catalogue non définie')
22
+ }
23
+ const config = secrets.apiKey ? { headers: { 'x-apiKey': secrets.apiKey } } : undefined
24
+ await axios.get(catalogConfig.url + '/data-fair/api/v1/catalog/datasets?size=1&select=id', config)
13
25
  } catch (e) {
14
26
  console.error('Erreur URL pendant la configuration : ', e instanceof Error ? e.message : e)
15
- throw new Error('Configuration invalide, vérifiez l\'URL')
27
+ throw new Error(`Configuration invalide, veuillez vérifier lURL du catalogue et la clé API si nécessaire (${e instanceof Error ? e.message : e})`)
16
28
  }
17
29
 
18
30
  return {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@data-fair/catalog-data-fair",
3
3
  "description": "A simple Data Fair plugin for the Data Fair catalogs service.",
4
- "version": "0.1.0",
4
+ "version": "0.2.0",
5
5
  "main": "index.ts",
6
6
  "type": "module",
7
7
  "scripts": {
@@ -31,7 +31,8 @@
31
31
  "dependencies": {
32
32
  "@data-fair/lib-node": "^2.8.2",
33
33
  "@data-fair/lib-utils": "^1.6.0",
34
- "prom-client": "^15.1.3"
34
+ "prom-client": "^15.1.3",
35
+ "slugify": "^1.6.6"
35
36
  },
36
37
  "devDependencies": {
37
38
  "@commitlint/cli": "^19.8.0",
@@ -6,9 +6,14 @@ export const schemaExports: string[]
6
6
  * The url of the catalog
7
7
  */
8
8
  export type URL = string;
9
+ /**
10
+ * The Data Fair API key to access the catalog. You can create one from the 'Settings' tab in Data Fair, under the 'API Keys' section.
11
+ */
12
+ export type APIKeyOptional = string;
9
13
 
10
14
  export type DataFairConfig = {
11
15
  url: URL;
16
+ apiKey?: APIKeyOptional;
12
17
  }
13
18
 
14
19
 
@@ -38,7 +38,7 @@ export const schema = {
38
38
  "title": "URL",
39
39
  "description": "The url of the catalog",
40
40
  "x-i18n-description": {
41
- "fr": "L'URL du site où le catalogue est publié. *L'URL de l'API utilisée sera* *https*[]()*://example.com***/data-fair/api/v1**"
41
+ "fr": "L'URL du portail où le catalogue est publié. Par exemple: `https://opendata.koumoul.com`."
42
42
  },
43
43
  "pattern": "^https?://.*[^/]$",
44
44
  "errorMessage": "The URL must start with http:// or https:// and must not end with `/`.",
@@ -46,8 +46,25 @@ export const schema = {
46
46
  "fr": "L'URL doit commencer par http:// ou https:// et ne pas se terminer par `/`."
47
47
  },
48
48
  "examples": [
49
- "https://example.com"
49
+ "https://opendata.koumoul.com"
50
50
  ]
51
+ },
52
+ "apiKey": {
53
+ "type": "string",
54
+ "title": "API Key (Optional)",
55
+ "x-i18n-title": {
56
+ "fr": "Clé API (Optionnelle)"
57
+ },
58
+ "description": "The Data Fair API key to access the catalog. You can create one from the 'Settings' tab in Data Fair, under the 'API Keys' section.",
59
+ "x-i18n-description": {
60
+ "fr": "La clé API Data Fair pour accéder au catalogue. Vous pouvez en créer depuis l'onglet 'Paramètres' de Data Fair, dans la section 'Clés d'API'."
61
+ },
62
+ "layout": {
63
+ "props": {
64
+ "type": "password",
65
+ "autocomplete": "new-password"
66
+ }
67
+ }
51
68
  }
52
69
  }
53
70
  }
@@ -4,7 +4,7 @@
4
4
  "use strict";
5
5
  export const validate = validate14;
6
6
  export default validate14;
7
- const schema16 = {"$id":"https://github.com/data-fair/catalog-data-fair/catalog-config","x-exports":["types","validate","schema"],"title":"DataFairConfig","type":"object","additionalProperties":false,"required":["url"],"properties":{"url":{"type":"string","title":"URL","description":"The url of the catalog","x-i18n-description":{"fr":"L'URL du site où le catalogue est publié. *L'URL de l'API utilisée sera* *https*[]()*://example.com***/data-fair/api/v1**"},"pattern":"^https?://.*[^/]$","errorMessage":"The URL must start with http:// or https:// and must not end with `/`.","x-i18n-errorMessage":{"fr":"L'URL doit commencer par http:// ou https:// et ne pas se terminer par `/`."},"examples":["https://example.com"]}}};
7
+ const schema16 = {"$id":"https://github.com/data-fair/catalog-data-fair/catalog-config","x-exports":["types","validate","schema"],"title":"DataFairConfig","type":"object","additionalProperties":false,"required":["url"],"properties":{"url":{"type":"string","title":"URL","description":"The url of the catalog","x-i18n-description":{"fr":"L'URL du portail où le catalogue est publié. Par exemple: `https://opendata.koumoul.com`."},"pattern":"^https?://.*[^/]$","errorMessage":"The URL must start with http:// or https:// and must not end with `/`.","x-i18n-errorMessage":{"fr":"L'URL doit commencer par http:// ou https:// et ne pas se terminer par `/`."},"examples":["https://opendata.koumoul.com"]},"apiKey":{"type":"string","title":"API Key (Optional)","x-i18n-title":{"fr":"Clé API (Optionnelle)"},"description":"The Data Fair API key to access the catalog. You can create one from the 'Settings' tab in Data Fair, under the 'API Keys' section.","x-i18n-description":{"fr":"La clé API Data Fair pour accéder au catalogue. Vous pouvez en créer depuis l'onglet 'Paramètres' de Data Fair, dans la section 'Clés d'API'."},"layout":{"props":{"type":"password","autocomplete":"new-password"}}}}};
8
8
  const pattern0 = new RegExp("^https?://.*[^/]$", "u");
9
9
 
10
10
  function validate14(data, {instancePath="", parentData, parentDataProperty, rootData=data}={}){
@@ -23,7 +23,7 @@ vErrors.push(err0);
23
23
  errors++;
24
24
  }
25
25
  for(const key0 in data){
26
- if(!(key0 === "url")){
26
+ if(!((key0 === "url") || (key0 === "apiKey"))){
27
27
  const err1 = {instancePath,schemaPath:"#/additionalProperties",keyword:"additionalProperties",params:{additionalProperty: key0},message:"must NOT have additional properties"};
28
28
  if(vErrors === null){
29
29
  vErrors = [err1];
@@ -86,9 +86,9 @@ vErrors = emErrs1;
86
86
  errors = emErrs1.length;
87
87
  }
88
88
  }
89
- }
90
- else {
91
- const err7 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
89
+ if(data.apiKey !== undefined){
90
+ if(typeof data.apiKey !== "string"){
91
+ const err7 = {instancePath:instancePath+"/apiKey",schemaPath:"#/properties/apiKey/type",keyword:"type",params:{type: "string"},message:"must be string"};
92
92
  if(vErrors === null){
93
93
  vErrors = [err7];
94
94
  }
@@ -97,6 +97,18 @@ vErrors.push(err7);
97
97
  }
98
98
  errors++;
99
99
  }
100
+ }
101
+ }
102
+ else {
103
+ const err8 = {instancePath,schemaPath:"#/type",keyword:"type",params:{type: "object"},message:"must be object"};
104
+ if(vErrors === null){
105
+ vErrors = [err8];
106
+ }
107
+ else {
108
+ vErrors.push(err8);
109
+ }
110
+ errors++;
111
+ }
100
112
  validate14.errors = vErrors;
101
113
  return errors === 0;
102
114
  }
@@ -17,14 +17,33 @@
17
17
  "title": "URL",
18
18
  "description": "The url of the catalog",
19
19
  "x-i18n-description": {
20
- "fr": "L'URL du site où le catalogue est publié. *L'URL de l'API utilisée sera* *https*[]()*://example.com***/data-fair/api/v1**"
20
+ "fr": "L'URL du portail où le catalogue est publié. Par exemple: `https://opendata.koumoul.com`."
21
21
  },
22
22
  "pattern": "^https?://.*[^/]$",
23
23
  "errorMessage": "The URL must start with http:// or https:// and must not end with `/`.",
24
24
  "x-i18n-errorMessage": {
25
25
  "fr": "L'URL doit commencer par http:// ou https:// et ne pas se terminer par `/`."
26
26
  },
27
- "examples": ["https://example.com"]
27
+ "examples": [
28
+ "https://opendata.koumoul.com"
29
+ ]
30
+ },
31
+ "apiKey": {
32
+ "type": "string",
33
+ "title": "API Key (Optional)",
34
+ "x-i18n-title": {
35
+ "fr": "Clé API (Optionnelle)"
36
+ },
37
+ "description": "The Data Fair API key to access the catalog. You can create one from the 'Settings' tab in Data Fair, under the 'API Keys' section.",
38
+ "x-i18n-description": {
39
+ "fr": "La clé API Data Fair pour accéder au catalogue. Vous pouvez en créer depuis l'onglet 'Paramètres' de Data Fair, dans la section 'Clés d'API'."
40
+ },
41
+ "layout": {
42
+ "props": {
43
+ "type": "password",
44
+ "autocomplete": "new-password"
45
+ }
46
+ }
28
47
  }
29
48
  }
30
- }
49
+ }
@@ -1,5 +1,5 @@
1
1
  {
2
- "$id": "https://github.com/data-fair/catalog-data-fair/catlog-schemas",
2
+ "$id": "https://github.com/data-fair/catalog-data-fair/catalog-schemas",
3
3
  "x-exports": [
4
4
  "types"
5
5
  ],