@opentermsarchive/engine 9.2.3 → 10.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ #! /usr/bin/env node
2
+ import './env.js';
3
+
4
+ import path from 'path';
5
+ import { fileURLToPath, pathToFileURL } from 'url';
6
+
7
+ import { program } from 'commander';
8
+
9
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
+
11
+ const { applyTechnicalUpgrades } = await import(pathToFileURL(path.resolve(__dirname, '../src/index.js'))); // load asynchronously to ensure env.js is loaded before
12
+
13
+ program
14
+ .name('ota apply-technical-upgrades')
15
+ .description('Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies, and by retrieving any missing snapshots for newly added source documents')
16
+ .option('-s, --services [serviceId...]', 'service IDs to apply technical upgrades to')
17
+ .option('-t, --types [termsType...]', 'terms types to apply technical upgrades to');
18
+
19
+ applyTechnicalUpgrades(program.parse(process.argv).opts());
@@ -11,9 +11,9 @@ import logger from '../src/logger/index.js';
11
11
 
12
12
  program
13
13
  .name('ota dataset')
14
- .description('Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
14
+ .description('Export the versions dataset into a ZIP file and optionally publish it to GitHub releases, GitLab releases, or data.gouv.fr')
15
15
  .option('-f, --file <filename>', 'file name of the generated dataset')
16
- .option('-p, --publish', 'publish dataset to GitHub releases on versions repository. Mandatory authentication to GitHub is provided through the `OTA_ENGINE_GITHUB_TOKEN` environment variable')
16
+ .option('-p, --publish', 'publish dataset. Supports GitHub releases (OTA_ENGINE_GITHUB_TOKEN), GitLab releases (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + config)')
17
17
  .option('-r, --remove-local-copy', 'remove local copy of dataset after publishing. Works only in combination with --publish option')
18
18
  .option('--schedule', 'schedule automatic dataset generation');
19
19
 
package/bin/ota-track.js CHANGED
@@ -15,7 +15,6 @@ program
15
15
  .description('Retrieve declared documents, record snapshots, extract versions and publish the resulting records')
16
16
  .option('-s, --services [serviceId...]', 'service IDs of services to track')
17
17
  .option('-t, --types [termsType...]', 'terms types to track')
18
- .option('-e, --extract-only', 'extract versions from existing snapshots with latest declarations and engine, without recording new snapshots')
19
18
  .option('--schedule', 'track automatically at a regular interval');
20
19
 
21
20
  track(program.parse(process.argv).opts());
package/bin/ota.js CHANGED
@@ -11,6 +11,7 @@ program
11
11
  .description(description)
12
12
  .version(version)
13
13
  .command('track', 'Track the current terms of services according to provided declarations')
14
+ .command('apply-technical-upgrades', 'Apply technical upgrades by generating new versions from the latest snapshots using updated declarations, engine logic, or dependencies')
14
15
  .command('validate', 'Run a series of tests to check the validity of terms declarations')
15
16
  .command('lint', 'Check format and stylistic errors in declarations and auto fix them')
16
17
  .command('dataset', 'Export the versions dataset into a ZIP file and optionally publish it to GitHub releases')
@@ -56,7 +56,7 @@
56
56
  }
57
57
  },
58
58
  "dataset": {
59
- "title": "sandbox",
59
+ "title": "Sandbox collection dataset",
60
60
  "versionsRepositoryURL": "https://github.com/OpenTermsArchive/sandbox-declarations",
61
61
  "publishingSchedule": "30 8 * * MON"
62
62
  }
@@ -0,0 +1,60 @@
1
+ {
2
+ "@opentermsarchive/engine": {
3
+ "collectionPath": "../collections/pga-declarations",
4
+ "recorder": {
5
+ "versions": {
6
+ "storage": {
7
+ "git": {
8
+ "snapshotIdentiferTemplate": "https://github.com/OpenTermsArchive/vlopses-ie-snapshots/commit/%SNAPSHOT_ID",
9
+ "repository": "git@github.com:OpenTermsArchive/vlopses-ie-versions.git",
10
+ "path": "./data/versions"
11
+ }
12
+ }
13
+ },
14
+ "snapshots": {
15
+ "storage": {
16
+ "git": {
17
+ "repository": "git@github.com:OpenTermsArchive/vlopses-ie-snapshots.git",
18
+ "path": "./data/snapshots"
19
+ }
20
+ }
21
+ }
22
+ },
23
+ "fetcher": {
24
+ "language": "en"
25
+ },
26
+ "notifier": {
27
+ "sendInBlue": {
28
+ "updatesListId": 0,
29
+ "updateTemplateId": 0
30
+ }
31
+ },
32
+ "logger": {
33
+ "sendMailOnError": {
34
+ "to": "admin@opentermsarchive.org",
35
+ "from": "noreply@opentermsarchive.org",
36
+ "sendWarnings": false
37
+ }
38
+ },
39
+ "reporter": {
40
+ "type": "github",
41
+ "repositories": {
42
+ "declarations": "OpenTermsArchive/vlopses-ie-declarations",
43
+ "versions": "OpenTermsArchive/vlopses-ie-versions",
44
+ "snapshots": "OpenTermsArchive/vlopses-ie-snapshots"
45
+ }
46
+ },
47
+ "collection-api": {
48
+ "host": "127.0.0.1",
49
+ "port": 3003,
50
+ "basePath": "/collection-api"
51
+ },
52
+ "dataset": {
53
+ "datagouv": {
54
+ "organizationIdOrSlug": "otatest",
55
+ "frequency": "weekly",
56
+ "useDemo": true
57
+ }
58
+ }
59
+ }
60
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opentermsarchive/engine",
3
- "version": "9.2.3",
3
+ "version": "10.1.1",
4
4
  "description": "Tracks and makes visible changes to the terms of online services",
5
5
  "homepage": "https://opentermsarchive.org",
6
6
  "bugs": {
@@ -14,7 +14,7 @@ export function title({ releaseDate }) {
14
14
 
15
15
  const title = config.get('@opentermsarchive/engine.dataset.title');
16
16
 
17
- return `${title} — ${releaseDate} dataset`;
17
+ return `${title} — ${releaseDate}`;
18
18
  }
19
19
 
20
20
  export function body({ servicesCount, firstVersionDate, lastVersionDate }) {
@@ -1,4 +1,4 @@
1
- # Open Terms Archive — sandbox — January 1, 2022 dataset
1
+ # Open Terms Archive — sandbox — January 1, 2022
2
2
 
3
3
  This dataset consolidates the contractual documents of 2 service providers, in all their versions that were accessible online between January 1, 2021 and January 6, 2022.
4
4
 
@@ -9,7 +9,7 @@ import publishRelease from './publish/index.js';
9
9
 
10
10
  export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }) {
11
11
  const releaseDate = new Date();
12
- const archiveName = fileName || `dataset-${config.get('@opentermsarchive/engine.dataset.title')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
12
+ const archiveName = fileName || `${config.get('@opentermsarchive/engine.dataset.title').toLowerCase().replace(/[^a-zA-Z0-9.\-_]/g, '-')}-${releaseDate.toISOString().replace(/T.*/, '')}`;
13
13
  const archivePath = `${path.basename(archiveName, '.zip')}.zip`; // allow to pass filename or filename.zip as the archive name and have filename.zip as the result name
14
14
 
15
15
  logger.info('Start exporting dataset…');
@@ -24,13 +24,18 @@ export async function release({ shouldPublish, shouldRemoveLocalCopy, fileName }
24
24
 
25
25
  logger.info('Start publishing dataset…');
26
26
 
27
- const releaseUrl = await publishRelease({
27
+ const results = await publishRelease({
28
28
  archivePath,
29
29
  releaseDate,
30
30
  stats,
31
31
  });
32
32
 
33
- logger.info(`Dataset published to ${releaseUrl}`);
33
+ if (results.length > 0) {
34
+ logger.info('Dataset published to following platforms:');
35
+ results.forEach(result => {
36
+ logger.info(` - ${result.platform}: ${result.url}`);
37
+ });
38
+ }
34
39
 
35
40
  if (!shouldRemoveLocalCopy) {
36
41
  return;
@@ -8,13 +8,35 @@ const { combine, timestamp, printf, colorize } = winston.format;
8
8
  logger.format = combine(
9
9
  colorize(),
10
10
  timestamp({ format: 'YYYY-MM-DDTHH:mm:ssZ' }),
11
- printf(({ level, message, counter, hash, timestamp }) => {
12
- const prefix = counter && hash ? `${counter.toString().padEnd(6)} ${hash.padEnd(40)}` : '';
11
+ printf(({ level, message, counter, hash, timestamp, module }) => {
12
+ let prefix = counter && hash ? `${counter.toString().padEnd(6)} ${hash.padEnd(40)}` : '';
13
13
 
14
14
  const timestampPrefix = config.get('@opentermsarchive/engine.logger.timestampPrefix') ? `${timestamp} ` : '';
15
15
 
16
- return `${timestampPrefix}${level.padEnd(15)} ${prefix.padEnd(50)} ${message}`;
16
+ prefix = module ? `${module} ${prefix}` : prefix;
17
+
18
+ const levelStr = level.padEnd(15);
19
+ let coloredLevel = levelStr;
20
+ let coloredMessage = message;
21
+
22
+ if (level.includes('warn')) {
23
+ coloredLevel = `\x1b[33m${levelStr}\x1b[0m`;
24
+ coloredMessage = `\x1b[33m${message}\x1b[0m`;
25
+ } else if (level.includes('error')) {
26
+ coloredLevel = `\x1b[31m${levelStr}\x1b[0m`;
27
+ coloredMessage = `\x1b[31m${message}\x1b[0m`;
28
+ }
29
+
30
+ return `${timestampPrefix} ${coloredLevel} ${prefix.padEnd(50)} ${coloredMessage}`;
17
31
  }),
18
32
  );
19
33
 
34
+ export function createModuleLogger(moduleName) {
35
+ return {
36
+ info: message => logger.info(message, { module: moduleName }),
37
+ warn: message => logger.warn(message, { module: moduleName }),
38
+ error: message => logger.error(message, { module: moduleName }),
39
+ };
40
+ }
41
+
20
42
  export default logger;
@@ -0,0 +1,234 @@
1
+ import fsApi from 'fs';
2
+ import path from 'path';
3
+
4
+ import FormData from 'form-data';
5
+ import nodeFetch from 'node-fetch';
6
+
7
+ import { createModuleLogger } from '../../logger/index.js';
8
+
9
+ const logger = createModuleLogger('datagouv');
10
+
11
+ const DATASET_LICENSE = 'odc-odbl';
12
+ const DEFAULT_RESOURCE_DESCRIPTION = 'See README.md inside the archive for dataset structure and usage information.';
13
+
14
+ const routes = {
15
+ dataset: (apiBaseUrl, datasetId) => `${apiBaseUrl}/datasets/${datasetId}/`,
16
+ datasets: apiBaseUrl => `${apiBaseUrl}/datasets/`,
17
+ datasetUpload: (apiBaseUrl, datasetId) => `${apiBaseUrl}/datasets/${datasetId}/upload/`,
18
+ resource: (apiBaseUrl, datasetId, resourceId) => `${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/`,
19
+ resourceUpload: (apiBaseUrl, datasetId, resourceId) => `${apiBaseUrl}/datasets/${datasetId}/resources/${resourceId}/upload/`,
20
+ organization: (apiBaseUrl, organizationIdOrSlug) => `${apiBaseUrl}/organizations/${organizationIdOrSlug}/`,
21
+ organizationDatasets: (apiBaseUrl, organizationId) => `${apiBaseUrl}/organizations/${organizationId}/datasets/?page_size=100`,
22
+ };
23
+
24
+ export async function getOrganization({ apiBaseUrl, headers, organizationIdOrSlug }) {
25
+ logger.info(`Fetching organization: ${organizationIdOrSlug}…`);
26
+
27
+ const orgResponse = await nodeFetch(routes.organization(apiBaseUrl, organizationIdOrSlug), { headers });
28
+
29
+ if (!orgResponse.ok) {
30
+ const errorText = await orgResponse.text();
31
+
32
+ throw new Error(`Failed to retrieve organization: ${orgResponse.status} ${orgResponse.statusText} - ${errorText}`);
33
+ }
34
+
35
+ const orgData = await orgResponse.json();
36
+
37
+ logger.info(`Found organization: ${orgData.name} (ID: ${orgData.id})`);
38
+
39
+ return orgData;
40
+ }
41
+
42
+ export async function getDataset({ apiBaseUrl, headers, datasetId }) {
43
+ const datasetResponse = await nodeFetch(routes.dataset(apiBaseUrl, datasetId), { headers });
44
+
45
+ if (!datasetResponse.ok) {
46
+ const errorText = await datasetResponse.text();
47
+ const error = new Error(`Failed to retrieve dataset: ${datasetResponse.status} ${datasetResponse.statusText} - ${errorText}`);
48
+
49
+ error.statusCode = datasetResponse.status;
50
+ throw error;
51
+ }
52
+
53
+ const datasetData = await datasetResponse.json();
54
+
55
+ return datasetData;
56
+ }
57
+
58
+ export async function findDatasetByTitle({ apiBaseUrl, headers, organizationId, title }) {
59
+ logger.info(`Searching for dataset with title "${title}" in organization…`);
60
+
61
+ const searchResponse = await nodeFetch(routes.organizationDatasets(apiBaseUrl, organizationId), { headers });
62
+
63
+ if (!searchResponse.ok) {
64
+ const errorText = await searchResponse.text();
65
+
66
+ throw new Error(`Failed to search for datasets: ${searchResponse.status} ${searchResponse.statusText} - ${errorText}`);
67
+ }
68
+
69
+ const searchData = await searchResponse.json();
70
+
71
+ const dataset = searchData.data.find(ds => ds.title === title);
72
+
73
+ if (dataset) {
74
+ logger.info(`Found existing dataset: ${dataset.title} (ID: ${dataset.id})`);
75
+
76
+ return dataset;
77
+ }
78
+
79
+ logger.info('No existing dataset found with this title');
80
+
81
+ return null;
82
+ }
83
+
84
+ export async function createDataset({ apiBaseUrl, headers, organizationId, title, description, license, frequency }) {
85
+ logger.info(`Creating new dataset: ${title}…`);
86
+
87
+ const createResponse = await nodeFetch(routes.datasets(apiBaseUrl), {
88
+ method: 'POST',
89
+ headers: {
90
+ ...headers,
91
+ 'Content-Type': 'application/json',
92
+ },
93
+ body: JSON.stringify({
94
+ title,
95
+ description,
96
+ organization: organizationId,
97
+ license,
98
+ frequency,
99
+ }),
100
+ });
101
+
102
+ if (!createResponse.ok) {
103
+ const errorText = await createResponse.text();
104
+
105
+ throw new Error(`Failed to create dataset: ${createResponse.status} ${createResponse.statusText} - ${errorText}`);
106
+ }
107
+
108
+ const dataset = await createResponse.json();
109
+
110
+ logger.info(`Dataset created successfully: ${dataset.title} (ID: ${dataset.id})`);
111
+
112
+ return dataset;
113
+ }
114
+
115
+ export async function updateDatasetMetadata({ apiBaseUrl, headers, datasetId, title, description, stats, frequency }) {
116
+ const updatePayload = {
117
+ title,
118
+ description,
119
+ license: DATASET_LICENSE,
120
+ frequency,
121
+ };
122
+
123
+ if (stats?.firstVersionDate && stats?.lastVersionDate) {
124
+ updatePayload.temporal_coverage = {
125
+ start: stats.firstVersionDate.toISOString(),
126
+ end: stats.lastVersionDate.toISOString(),
127
+ };
128
+ }
129
+
130
+ const updateResponse = await nodeFetch(routes.dataset(apiBaseUrl, datasetId), {
131
+ method: 'PUT',
132
+ headers: {
133
+ ...headers,
134
+ 'Content-Type': 'application/json',
135
+ },
136
+ body: JSON.stringify(updatePayload),
137
+ });
138
+
139
+ if (!updateResponse.ok) {
140
+ const errorText = await updateResponse.text();
141
+ const error = new Error(`Failed to update dataset metadata: ${updateResponse.status} ${updateResponse.statusText} - ${errorText}`);
142
+
143
+ error.statusCode = updateResponse.status;
144
+ throw error;
145
+ }
146
+
147
+ logger.info('Dataset metadata updated successfully');
148
+ }
149
+
150
+ export async function uploadResource({ apiBaseUrl, headers, datasetId, archivePath }) {
151
+ logger.info('Uploading dataset archive…');
152
+
153
+ const { formData, fileName } = createFormDataForFile(archivePath);
154
+
155
+ const uploadResponse = await nodeFetch(routes.datasetUpload(apiBaseUrl, datasetId), {
156
+ method: 'POST',
157
+ headers: { ...formData.getHeaders(), ...headers },
158
+ body: formData,
159
+ });
160
+
161
+ if (!uploadResponse.ok) {
162
+ const errorText = await uploadResponse.text();
163
+
164
+ throw new Error(`Failed to upload dataset file: ${uploadResponse.status} ${uploadResponse.statusText} - ${errorText}`);
165
+ }
166
+
167
+ const uploadResult = await uploadResponse.json();
168
+
169
+ logger.info(`Dataset file uploaded successfully with resource ID: ${uploadResult.id}`);
170
+
171
+ return { resourceId: uploadResult.id, fileName };
172
+ }
173
+
174
+ export async function replaceResourceFile({ apiBaseUrl, headers, datasetId, resourceId, archivePath }) {
175
+ logger.info(`Replacing file for existing resource ID: ${resourceId}…`);
176
+
177
+ const { formData, fileName } = createFormDataForFile(archivePath);
178
+
179
+ const uploadResponse = await nodeFetch(routes.resourceUpload(apiBaseUrl, datasetId, resourceId), {
180
+ method: 'POST',
181
+ headers: { ...formData.getHeaders(), ...headers },
182
+ body: formData,
183
+ });
184
+
185
+ if (!uploadResponse.ok) {
186
+ const errorText = await uploadResponse.text();
187
+
188
+ throw new Error(`Failed to replace resource file: ${uploadResponse.status} ${uploadResponse.statusText} - ${errorText}`);
189
+ }
190
+
191
+ const uploadResult = await uploadResponse.json();
192
+
193
+ logger.info('Resource file replaced successfully');
194
+
195
+ return { resourceId: uploadResult.id, fileName };
196
+ }
197
+
198
+ export async function updateResourceMetadata({ apiBaseUrl, headers, datasetId, resourceId, fileName }) {
199
+ logger.info('Updating resource metadata…');
200
+
201
+ const resourceUpdateResponse = await nodeFetch(routes.resource(apiBaseUrl, datasetId, resourceId), {
202
+ method: 'PUT',
203
+ headers: { ...headers, 'Content-Type': 'application/json' },
204
+ body: JSON.stringify({
205
+ title: fileName,
206
+ description: DEFAULT_RESOURCE_DESCRIPTION,
207
+ filetype: 'file',
208
+ format: 'zip',
209
+ mime: 'application/zip',
210
+ }),
211
+ });
212
+
213
+ if (!resourceUpdateResponse.ok) {
214
+ const errorText = await resourceUpdateResponse.text();
215
+
216
+ throw new Error(`Failed to update resource metadata: ${resourceUpdateResponse.status} ${resourceUpdateResponse.statusText} - ${errorText}`);
217
+ }
218
+
219
+ logger.info('Resource metadata updated successfully');
220
+ }
221
+
222
+ function createFormDataForFile(archivePath) {
223
+ const formData = new FormData();
224
+ const fileName = path.basename(archivePath);
225
+ const fileStats = fsApi.statSync(archivePath);
226
+
227
+ formData.append('file', fsApi.createReadStream(archivePath), {
228
+ filename: fileName,
229
+ contentType: 'application/zip',
230
+ knownLength: fileStats.size,
231
+ });
232
+
233
+ return { formData, fileName };
234
+ }
@@ -0,0 +1,82 @@
1
+ import config from 'config';
2
+
3
+ import * as readme from '../../assets/README.template.js';
4
+ import { createModuleLogger } from '../../logger/index.js';
5
+
6
+ import { updateDatasetMetadata, uploadResource, replaceResourceFile, updateResourceMetadata, getDataset, getOrganization, findDatasetByTitle, createDataset } from './dataset.js';
7
+
8
+ const logger = createModuleLogger('datagouv');
9
+
10
+ const PRODUCTION_API_BASE_URL = 'https://www.data.gouv.fr/api/1';
11
+ const DEMO_API_BASE_URL = 'https://demo.data.gouv.fr/api/1';
12
+ const DATASET_LICENSE = 'odc-odbl';
13
+
14
+ export default async function publish({ archivePath, stats }) {
15
+ const { datasetId, organizationIdOrSlug, apiBaseUrl, headers, datasetTitle, frequency } = loadConfiguration();
16
+ const description = readme.body(stats);
17
+
18
+ const dataset = datasetId
19
+ ? await getDataset({ apiBaseUrl, headers, datasetId })
20
+ : await ensureDatasetExists({ apiBaseUrl, headers, organizationIdOrSlug, datasetTitle, description, frequency });
21
+
22
+ await updateDatasetMetadata({ apiBaseUrl, headers, datasetId: dataset.id, title: datasetTitle, description, stats, frequency });
23
+
24
+ const { resourceId, fileName } = await handleResourceUpload({ apiBaseUrl, headers, datasetId: dataset.id, dataset, archivePath });
25
+
26
+ await updateResourceMetadata({ apiBaseUrl, headers, datasetId: dataset.id, resourceId, fileName });
27
+
28
+ logger.info(`Dataset published successfully: ${dataset.page}`);
29
+
30
+ return dataset.page;
31
+ }
32
+
33
+ function loadConfiguration() {
34
+ const apiKey = process.env.OTA_ENGINE_DATAGOUV_API_KEY;
35
+
36
+ if (!apiKey) {
37
+ throw new Error('OTA_ENGINE_DATAGOUV_API_KEY environment variable is required for data.gouv.fr publishing');
38
+ }
39
+
40
+ const datasetId = config.has('@opentermsarchive/engine.dataset.datagouv.datasetId') && config.get('@opentermsarchive/engine.dataset.datagouv.datasetId');
41
+ const organizationIdOrSlug = config.has('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug') && config.get('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug');
42
+
43
+ if (!datasetId && !organizationIdOrSlug) {
44
+ throw new Error('Either datasetId or organizationIdOrSlug is required in config at @opentermsarchive/engine.dataset.datagouv');
45
+ }
46
+
47
+ const datasetTitle = config.get('@opentermsarchive/engine.dataset.title');
48
+ const frequency = config.has('@opentermsarchive/engine.dataset.datagouv.frequency') && config.get('@opentermsarchive/engine.dataset.datagouv.frequency');
49
+ const useDemo = config.has('@opentermsarchive/engine.dataset.datagouv.useDemo') && config.get('@opentermsarchive/engine.dataset.datagouv.useDemo');
50
+ const apiBaseUrl = useDemo ? DEMO_API_BASE_URL : PRODUCTION_API_BASE_URL;
51
+
52
+ if (useDemo) {
53
+ logger.warn('Using demo.data.gouv.fr environment for testing');
54
+ }
55
+
56
+ const headers = { 'X-API-KEY': apiKey };
57
+
58
+ return { datasetId, organizationIdOrSlug, apiBaseUrl, headers, datasetTitle, frequency };
59
+ }
60
+
61
+ async function ensureDatasetExists({ apiBaseUrl, headers, organizationIdOrSlug, datasetTitle, description, frequency }) {
62
+ const organization = await getOrganization({ apiBaseUrl, headers, organizationIdOrSlug });
63
+ let dataset = await findDatasetByTitle({ apiBaseUrl, headers, organizationId: organization.id, title: datasetTitle });
64
+
65
+ if (!dataset) {
66
+ dataset = await createDataset({ apiBaseUrl, headers, organizationId: organization.id, title: datasetTitle, description, license: DATASET_LICENSE, frequency });
67
+ }
68
+
69
+ return dataset;
70
+ }
71
+
72
+ function handleResourceUpload({ apiBaseUrl, headers, datasetId, dataset, archivePath }) {
73
+ if (dataset?.resources?.length > 0) {
74
+ const existingResource = dataset.resources[0];
75
+
76
+ logger.info(`Found existing resource: ${existingResource.title} (ID: ${existingResource.id})`);
77
+
78
+ return replaceResourceFile({ apiBaseUrl, headers, datasetId, resourceId: existingResource.id, archivePath });
79
+ }
80
+
81
+ return uploadResource({ apiBaseUrl, headers, datasetId, archivePath });
82
+ }
@@ -1,19 +1,23 @@
1
1
  import fsApi from 'fs';
2
2
  import path from 'path';
3
- import url from 'url';
4
3
 
5
4
  import config from 'config';
6
5
  import { Octokit } from 'octokit'; // eslint-disable-line import/no-unresolved
7
6
 
8
7
  import * as readme from '../../assets/README.template.js';
8
+ import { createModuleLogger } from '../../logger/index.js';
9
+
10
+ const logger = createModuleLogger('github');
9
11
 
10
12
  export default async function publish({ archivePath, releaseDate, stats }) {
11
13
  const octokit = new Octokit({ auth: process.env.OTA_ENGINE_GITHUB_TOKEN });
12
14
 
13
- const [ owner, repo ] = url.parse(config.get('@opentermsarchive/engine.dataset.versionsRepositoryURL')).pathname.split('/').filter(component => component);
15
+ const [ owner, repo ] = new URL(config.get('@opentermsarchive/engine.dataset.versionsRepositoryURL')).pathname.split('/').filter(component => component);
14
16
 
15
17
  const tagName = `${path.basename(archivePath, path.extname(archivePath))}`; // use archive filename as Git tag
16
18
 
19
+ logger.info(`Creating release for ${owner}/${repo}…`);
20
+
17
21
  const { data: { upload_url: uploadUrl, html_url: releaseUrl } } = await octokit.rest.repos.createRelease({
18
22
  owner,
19
23
  repo,
@@ -22,6 +26,9 @@ export default async function publish({ archivePath, releaseDate, stats }) {
22
26
  body: readme.body(stats),
23
27
  });
24
28
 
29
+ logger.info(`Release created successfully with tag: ${tagName}`);
30
+ logger.info('Uploading release asset…');
31
+
25
32
  await octokit.rest.repos.uploadReleaseAsset({
26
33
  data: fsApi.readFileSync(archivePath),
27
34
  headers: {
@@ -32,5 +39,7 @@ export default async function publish({ archivePath, releaseDate, stats }) {
32
39
  url: uploadUrl,
33
40
  });
34
41
 
42
+ logger.info(`Release asset uploaded successfully: ${path.basename(archivePath)}`);
43
+
35
44
  return releaseUrl;
36
45
  }
@@ -8,7 +8,9 @@ import nodeFetch from 'node-fetch';
8
8
 
9
9
  import GitLab from '../../../../src/reporter/gitlab/index.js';
10
10
  import * as readme from '../../assets/README.template.js';
11
- import logger from '../../logger/index.js';
11
+ import { createModuleLogger } from '../../logger/index.js';
12
+
13
+ const logger = createModuleLogger('gitlab');
12
14
 
13
15
  dotenv.config({ quiet: true });
14
16
 
@@ -1,15 +1,49 @@
1
+ import config from 'config';
2
+
3
+ import logger from '../logger/index.js';
4
+
5
+ import publishDataGouv from './datagouv/index.js';
1
6
  import publishGitHub from './github/index.js';
2
7
  import publishGitLab from './gitlab/index.js';
3
8
 
4
- export default function publishRelease({ archivePath, releaseDate, stats }) {
9
+ export default async function publishRelease({ archivePath, releaseDate, stats }) {
10
+ const platforms = [];
11
+
5
12
  // If both GitHub and GitLab tokens are defined, GitHub takes precedence
6
13
  if (process.env.OTA_ENGINE_GITHUB_TOKEN) {
7
- return publishGitHub({ archivePath, releaseDate, stats });
14
+ platforms.push({ name: 'GitHub', publish: () => publishGitHub({ archivePath, releaseDate, stats }) });
15
+ } else if (process.env.OTA_ENGINE_GITLAB_TOKEN) {
16
+ platforms.push({ name: 'GitLab', publish: () => publishGitLab({ archivePath, releaseDate, stats }) });
17
+ }
18
+
19
+ if (process.env.OTA_ENGINE_DATAGOUV_API_KEY && (config.has('@opentermsarchive/engine.dataset.datagouv.datasetId') || config.has('@opentermsarchive/engine.dataset.datagouv.organizationIdOrSlug'))) {
20
+ platforms.push({ name: 'data.gouv.fr', publish: () => publishDataGouv({ archivePath, releaseDate, stats }) });
21
+ }
22
+
23
+ if (!platforms.length) {
24
+ throw new Error('No publishing platform configured. Please configure at least one of: GitHub (OTA_ENGINE_GITHUB_TOKEN), GitLab (OTA_ENGINE_GITLAB_TOKEN), or data.gouv.fr (OTA_ENGINE_DATAGOUV_API_KEY + datasetId or organizationIdOrSlug in config).');
8
25
  }
9
26
 
10
- if (process.env.OTA_ENGINE_GITLAB_TOKEN) {
11
- return publishGitLab({ archivePath, releaseDate, stats });
27
+ const results = await Promise.allSettled(platforms.map(async platform => {
28
+ const url = await platform.publish();
29
+
30
+ return { platform: platform.name, url };
31
+ }));
32
+
33
+ const succeeded = results.filter(result => result.status === 'fulfilled');
34
+ const failed = results.filter(result => result.status === 'rejected');
35
+
36
+ if (failed.length) {
37
+ let errorMessage = !succeeded.length ? 'All platforms failed to publish:' : 'Some platforms failed to publish:';
38
+
39
+ failed.forEach(rejectedResult => {
40
+ const index = results.indexOf(rejectedResult);
41
+
42
+ errorMessage += `\n - ${platforms[index].name}: ${rejectedResult.reason.message}`;
43
+ });
44
+
45
+ logger.error(errorMessage);
12
46
  }
13
47
 
14
- throw new Error('No GitHub nor GitLab token found in environment variables (OTA_ENGINE_GITHUB_TOKEN or OTA_ENGINE_GITLAB_TOKEN). Cannot publish the dataset without authentication.');
48
+ return succeeded.map(result => result.value);
15
49
  }