@opentermsarchive/engine 2.7.0 → 2.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/scripts/dataset/logger/index.js +1 -1
- package/src/archivist/index.js +11 -1
- package/src/archivist/recorder/repositories/git/git.js +23 -7
- package/src/archivist/recorder/repositories/git/index.js +12 -6
- package/src/collection-api/logger.js +1 -1
- package/src/logger/index.js +13 -3
- package/src/logger/utils.js +22 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@opentermsarchive/engine",
|
|
3
|
-
"version": "2.7.
|
|
3
|
+
"version": "2.7.2",
|
|
4
4
|
"description": "Tracks and makes visible changes to the terms of online services",
|
|
5
5
|
"homepage": "https://opentermsarchive.org",
|
|
6
6
|
"bugs": {
|
|
@@ -96,7 +96,7 @@
|
|
|
96
96
|
"puppeteer-extra": "^3.3.6",
|
|
97
97
|
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
98
98
|
"sib-api-v3-sdk": "^8.2.1",
|
|
99
|
-
"simple-git": "^3.
|
|
99
|
+
"simple-git": "^3.27.0",
|
|
100
100
|
"swagger-jsdoc": "^6.2.8",
|
|
101
101
|
"swagger-ui-express": "^5.0.0",
|
|
102
102
|
"winston": "^3.9.0",
|
|
@@ -7,7 +7,7 @@ const { combine, timestamp, printf, colorize } = winston.format;
|
|
|
7
7
|
|
|
8
8
|
logger.format = combine(
|
|
9
9
|
colorize(),
|
|
10
|
-
timestamp({ format: 'YYYY-MM-DDTHH:
|
|
10
|
+
timestamp({ format: 'YYYY-MM-DDTHH:mm:ssZ' }),
|
|
11
11
|
printf(({ level, message, counter, hash, timestamp }) => {
|
|
12
12
|
const prefix = counter && hash ? `${counter.toString().padEnd(6)} ${hash.padEnd(40)}` : '';
|
|
13
13
|
|
package/src/archivist/index.js
CHANGED
|
@@ -28,6 +28,7 @@ export const EVENTS = [
|
|
|
28
28
|
'trackingStarted',
|
|
29
29
|
'trackingCompleted',
|
|
30
30
|
'inaccessibleContent',
|
|
31
|
+
'info',
|
|
31
32
|
'error',
|
|
32
33
|
'pluginError',
|
|
33
34
|
];
|
|
@@ -45,6 +46,7 @@ export default class Archivist extends events.EventEmitter {
|
|
|
45
46
|
}
|
|
46
47
|
|
|
47
48
|
async initialize() {
|
|
49
|
+
this.emit('info', 'Initializing engine…');
|
|
48
50
|
if (this.services) {
|
|
49
51
|
return;
|
|
50
52
|
}
|
|
@@ -67,6 +69,8 @@ export default class Archivist extends events.EventEmitter {
|
|
|
67
69
|
process.exit(1);
|
|
68
70
|
});
|
|
69
71
|
|
|
72
|
+
this.emit('info', 'Initialization completed');
|
|
73
|
+
|
|
70
74
|
return this;
|
|
71
75
|
}
|
|
72
76
|
|
|
@@ -140,7 +144,13 @@ export default class Archivist extends events.EventEmitter {
|
|
|
140
144
|
return;
|
|
141
145
|
}
|
|
142
146
|
|
|
143
|
-
|
|
147
|
+
await this.recordVersion(terms, extractOnly);
|
|
148
|
+
|
|
149
|
+
terms.sourceDocuments.forEach(sourceDocument => {
|
|
150
|
+
sourceDocument.content = null; // Reduce memory usage by clearing no longer needed large content strings
|
|
151
|
+
sourceDocument.mimeType = null; // …and associated MIME type
|
|
152
|
+
sourceDocument.snapshotId = null; // …and associated snapshot ID for consistency
|
|
153
|
+
});
|
|
144
154
|
}
|
|
145
155
|
|
|
146
156
|
async fetchSourceDocuments(terms) {
|
|
@@ -18,7 +18,10 @@ export default class Git {
|
|
|
18
18
|
await fs.mkdir(this.path, { recursive: true });
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
-
this.git = simpleGit(this.path, {
|
|
21
|
+
this.git = simpleGit(this.path, {
|
|
22
|
+
trimmed: true,
|
|
23
|
+
maxConcurrentProcesses: 1,
|
|
24
|
+
});
|
|
22
25
|
|
|
23
26
|
await this.git.init();
|
|
24
27
|
|
|
@@ -27,7 +30,8 @@ export default class Git {
|
|
|
27
30
|
.addConfig('push.default', 'current')
|
|
28
31
|
.addConfig('user.name', this.author.name)
|
|
29
32
|
.addConfig('user.email', this.author.email)
|
|
30
|
-
.addConfig('core.quotePath', false)
|
|
33
|
+
.addConfig('core.quotePath', false) // Disable Git's encoding of special characters in pathnames. For example, `service·A` will be encoded as `service\302\267A` without this setting, leading to issues. See https://git-scm.com/docs/git-config#Documentation/git-config.txt-corequotePath
|
|
34
|
+
.addConfig('core.commitGraph', true); // Enable `commit-graph` feature for efficient commit data storage, improving performance of operations like `git log`
|
|
31
35
|
}
|
|
32
36
|
|
|
33
37
|
add(filePath) {
|
|
@@ -42,7 +46,7 @@ export default class Git {
|
|
|
42
46
|
process.env.GIT_AUTHOR_DATE = commitDate;
|
|
43
47
|
process.env.GIT_COMMITTER_DATE = commitDate;
|
|
44
48
|
|
|
45
|
-
summary = await this.git.commit(message, filePath);
|
|
49
|
+
summary = await this.git.commit(message, filePath, ['--no-verify']); // Skip pre-commit and commit-msg hooks, as commits are programmatically managed, to optimize performance
|
|
46
50
|
} finally {
|
|
47
51
|
process.env.GIT_AUTHOR_DATE = '';
|
|
48
52
|
process.env.GIT_COMMITTER_DATE = '';
|
|
@@ -60,11 +64,11 @@ export default class Git {
|
|
|
60
64
|
}
|
|
61
65
|
|
|
62
66
|
listCommits(options = []) {
|
|
63
|
-
return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]);
|
|
67
|
+
return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]); // Returns all commits in chronological order (`--reverse`), excluding merge commits (`--no-merges`), with modified files names (`--name-only`)
|
|
64
68
|
}
|
|
65
69
|
|
|
66
70
|
async getCommit(options) {
|
|
67
|
-
const [commit] = await this.listCommits([ '-1', ...options ]);
|
|
71
|
+
const [commit] = await this.listCommits([ '-1', ...options ]); // Returns only the most recent commit matching the given options
|
|
68
72
|
|
|
69
73
|
return commit;
|
|
70
74
|
}
|
|
@@ -103,8 +107,8 @@ export default class Git {
|
|
|
103
107
|
return this.git.clean('f', '-d');
|
|
104
108
|
}
|
|
105
109
|
|
|
106
|
-
|
|
107
|
-
return
|
|
110
|
+
getFullHash(shortHash) {
|
|
111
|
+
return this.git.show([ shortHash, '--pretty=%H', '-s' ]);
|
|
108
112
|
}
|
|
109
113
|
|
|
110
114
|
restore(path, commit) {
|
|
@@ -120,4 +124,16 @@ export default class Git {
|
|
|
120
124
|
relativePath(absolutePath) {
|
|
121
125
|
return path.relative(this.path, absolutePath); // Git needs a path relative to the .git directory, not an absolute one
|
|
122
126
|
}
|
|
127
|
+
|
|
128
|
+
async listFiles(path) {
|
|
129
|
+
return (await this.git.raw([ 'ls-files', path ])).split('\n');
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
async writeCommitGraph() {
|
|
133
|
+
await this.git.raw([ 'commit-graph', 'write', '--reachable', '--changed-paths' ]);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async updateCommitGraph() {
|
|
137
|
+
await this.git.raw([ 'commit-graph', 'write', '--reachable', '--changed-paths', '--append' ]);
|
|
138
|
+
}
|
|
123
139
|
}
|
|
@@ -29,6 +29,7 @@ export default class GitRepository extends RepositoryInterface {
|
|
|
29
29
|
async initialize() {
|
|
30
30
|
await this.git.initialize();
|
|
31
31
|
await this.git.cleanUp(); // Drop all uncommitted changes and remove all leftover files that may be present if the process was killed aggressively
|
|
32
|
+
await this.git.writeCommitGraph(); // Create or replace the commit graph with a new one to ensure it's fully consistent
|
|
32
33
|
|
|
33
34
|
return this;
|
|
34
35
|
}
|
|
@@ -56,17 +57,22 @@ export default class GitRepository extends RepositoryInterface {
|
|
|
56
57
|
return record;
|
|
57
58
|
}
|
|
58
59
|
|
|
59
|
-
finalize() {
|
|
60
|
-
if (
|
|
61
|
-
|
|
60
|
+
async finalize() {
|
|
61
|
+
if (this.needsPublication) {
|
|
62
|
+
await this.git.pushChanges();
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
return this.git.
|
|
65
|
+
return this.git.updateCommitGraph();
|
|
65
66
|
}
|
|
66
67
|
|
|
67
68
|
async findLatest(serviceId, termsType, documentId) {
|
|
68
|
-
const
|
|
69
|
-
|
|
69
|
+
const matchingFilesPaths = await this.git.listFiles(DataMapper.generateFilePath(serviceId, termsType, documentId));
|
|
70
|
+
|
|
71
|
+
if (!matchingFilesPaths.length) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const commit = await this.git.getCommit([...matchingFilesPaths]); // Returns the most recent commit that modified any of the matching files. If multiple files match the path pattern (e.g. both HTML and PDF versions exist), returns the commit that last modified any of them
|
|
70
76
|
|
|
71
77
|
return this.#toDomain(commit);
|
|
72
78
|
}
|
|
@@ -30,7 +30,7 @@ if (config.get('@opentermsarchive/engine.logger.sendMailOnError')) {
|
|
|
30
30
|
const logger = winston.createLogger({
|
|
31
31
|
format: combine(
|
|
32
32
|
colorize(),
|
|
33
|
-
timestamp({ format: 'YYYY-MM-DDTHH:
|
|
33
|
+
timestamp({ format: 'YYYY-MM-DDTHH:mm:ssZ' }),
|
|
34
34
|
printf(({ level, message, timestamp }) => {
|
|
35
35
|
const timestampPrefix = config.get('@opentermsarchive/engine.logger.timestampPrefix') ? `${timestamp} ` : '';
|
|
36
36
|
|
package/src/logger/index.js
CHANGED
|
@@ -4,11 +4,13 @@ import config from 'config';
|
|
|
4
4
|
import winston from 'winston';
|
|
5
5
|
import 'winston-mail';
|
|
6
6
|
|
|
7
|
+
import { formatDuration } from './utils.js';
|
|
8
|
+
|
|
7
9
|
const { combine, timestamp, printf, colorize } = winston.format;
|
|
8
10
|
|
|
9
11
|
const alignedWithColorsAndTime = combine(
|
|
10
12
|
colorize(),
|
|
11
|
-
timestamp({ format: 'YYYY-MM-DDTHH:
|
|
13
|
+
timestamp({ format: 'YYYY-MM-DDTHH:mm:ssZ' }),
|
|
12
14
|
printf(({ level, message, timestamp, serviceId, termsType, documentId }) => {
|
|
13
15
|
const servicePrefix = serviceId && termsType
|
|
14
16
|
? `${serviceId} — ${termsType}${documentId ? `:${documentId}` : ''}`
|
|
@@ -82,6 +84,7 @@ logger.configure({
|
|
|
82
84
|
|
|
83
85
|
let recordedSnapshotsCount;
|
|
84
86
|
let recordedVersionsCount;
|
|
87
|
+
let trackingStartTime;
|
|
85
88
|
|
|
86
89
|
logger.onFirstSnapshotRecorded = ({ serviceId, termsType, documentId, id }) => {
|
|
87
90
|
logger.info({ message: `Recorded first snapshot with id ${id}`, serviceId, termsType, documentId });
|
|
@@ -119,14 +122,17 @@ logger.onTrackingStarted = (numberOfServices, numberOfTerms, extractOnly) => {
|
|
|
119
122
|
}
|
|
120
123
|
recordedSnapshotsCount = 0;
|
|
121
124
|
recordedVersionsCount = 0;
|
|
125
|
+
trackingStartTime = Date.now();
|
|
122
126
|
};
|
|
123
127
|
|
|
124
128
|
logger.onTrackingCompleted = (numberOfServices, numberOfTerms, extractOnly) => {
|
|
129
|
+
const duration = formatDuration(Date.now() - trackingStartTime);
|
|
130
|
+
|
|
125
131
|
if (extractOnly) {
|
|
126
|
-
logger.info(`Examined ${numberOfTerms} terms from ${numberOfServices} services for extraction`);
|
|
132
|
+
logger.info(`Examined ${numberOfTerms} terms from ${numberOfServices} services for extraction in ${duration}`);
|
|
127
133
|
logger.info(`Recorded ${recordedVersionsCount} new versions\n`);
|
|
128
134
|
} else {
|
|
129
|
-
logger.info(`Tracked changes of ${numberOfTerms} terms from ${numberOfServices} services`);
|
|
135
|
+
logger.info(`Tracked changes of ${numberOfTerms} terms from ${numberOfServices} services in ${duration}`);
|
|
130
136
|
logger.info(`Recorded ${recordedSnapshotsCount} new snapshots and ${recordedVersionsCount} new versions\n`);
|
|
131
137
|
}
|
|
132
138
|
};
|
|
@@ -139,6 +145,10 @@ logger.onError = (error, terms) => {
|
|
|
139
145
|
logger.error({ message: error.stack, serviceId: terms.service.id, termsType: terms.type });
|
|
140
146
|
};
|
|
141
147
|
|
|
148
|
+
logger.onInfo = message => {
|
|
149
|
+
logger.info({ message });
|
|
150
|
+
};
|
|
151
|
+
|
|
142
152
|
logger.onPluginError = (error, pluginName) => {
|
|
143
153
|
logger.error({ message: `Error in "${pluginName}" plugin: ${error.stack}` });
|
|
144
154
|
};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export const formatDuration = milliseconds => {
|
|
2
|
+
const seconds = Math.floor(milliseconds / 1000);
|
|
3
|
+
const hours = Math.floor(seconds / 3600);
|
|
4
|
+
const minutes = Math.floor((seconds % 3600) / 60);
|
|
5
|
+
const remainingSeconds = seconds % 60;
|
|
6
|
+
|
|
7
|
+
const parts = [];
|
|
8
|
+
|
|
9
|
+
if (hours > 0) {
|
|
10
|
+
parts.push(`${hours} hour${hours > 1 ? 's' : ''}`);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (minutes > 0) {
|
|
14
|
+
parts.push(`${minutes} minute${minutes > 1 ? 's' : ''}`);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (remainingSeconds > 0 || parts.length === 0) {
|
|
18
|
+
parts.push(`${remainingSeconds} second${remainingSeconds !== 1 ? 's' : ''}`);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return parts.join(' and ');
|
|
22
|
+
};
|