@l10nmonster/helpers-lqaboss 3.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # @l10nmonster/helpers-lqaboss
2
+
3
+ Helper module for L10n Monster that provides LQA Boss flow capture functionality.
4
+
5
+ ## Features
6
+
7
+ - Captures web pages with screenshots and text metadata
8
+ - Creates `.lqaboss` files containing flow data
9
+ - Supports multi-page flows with interactive capture
10
+
11
+ ## Usage
12
+
13
+ This module exports an action that can be used with L10n Monster:
14
+
15
+ ```javascript
16
+ import { actions } from '@l10nmonster/helpers-lqaboss';
17
+
18
+ // Use the lqaboss action
19
+ await actions.lqaboss.action(mm, {
20
+ url: 'https://example.com',
21
+ flowName: 'my-flow'
22
+ });
23
+ ```
24
+
25
+ ## Testing
26
+
27
+ This module includes unit tests using Node.js built-in testing facilities.
28
+
29
+ ### Running Tests
30
+
31
+ ```bash
32
+ npm test
33
+ ```
34
+
35
+ ### Test Structure
36
+
37
+ The tests are located in `test/index.test.js` and cover:
38
+
39
+ - Help structure validation
40
+ - Action method interface verification
41
+ - Filename sanitization logic
42
+ - Module exports validation
43
+
44
+ ### Test Requirements
45
+
46
+ - Node.js 18+ (for built-in test runner)
47
+ - No additional test dependencies required
48
+
49
+ ## Dependencies
50
+
51
+ - `puppeteer` - For browser automation and page capture
52
+ - `jszip` - For creating compressed flow files
53
+ - `@l10nmonster/core` - Core L10n Monster functionality
package/flowCapture.js ADDED
@@ -0,0 +1,238 @@
1
+ import JSZip from 'jszip';
2
+ import puppeteer from 'puppeteer';
3
+ import { logInfo, logVerbose } from '@l10nmonster/core';
4
+
5
+ // --- Function to be evaluated in browser context ---
6
+ async function extractTextAndMetadataInPageContext() {
7
+ function fe00RangeToUtf8_browser(encoded) {
8
+ const encodingOffset = 0xfe00;
9
+ const decoder = new TextDecoder();
10
+ const length = encoded.length;
11
+ if (length % 2 !== 0) throw new Error('Invalid fe00 encoded input length');
12
+ const bytes = new Uint8Array(length / 2);
13
+ let byteIndex = 0;
14
+ for (let i = 0; i < length; i += 2) {
15
+ const highNibble = encoded.charCodeAt(i) - encodingOffset;
16
+ const lowNibble = encoded.charCodeAt(i + 1) - encodingOffset;
17
+ if (highNibble < 0 || highNibble > 15 || lowNibble < 0 || lowNibble > 15) {
18
+ throw new Error('Invalid char code in fe00 encoded input');
19
+ }
20
+ bytes[byteIndex++] = (highNibble << 4) | lowNibble;
21
+ }
22
+ return decoder.decode(bytes);
23
+ }
24
+
25
+ const textElements = [];
26
+ const START_MARKER_REGEX = /(?<![''<])\u200B([\uFE00-\uFE0F]+)/g;
27
+ const END_MARKER = '\u200B';
28
+
29
+ if (!document.body) {
30
+ return { error: 'Document body not found.' };
31
+ }
32
+
33
+ const treeWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null);
34
+
35
+ let activeSegment = null;
36
+ let node;
37
+
38
+ while (node = treeWalker.nextNode()) {
39
+ const parentElement = node.parentElement;
40
+ if (parentElement) {
41
+ const styles = window.getComputedStyle(parentElement);
42
+ if (styles.display === 'none' || styles.visibility === 'hidden' || parseFloat(styles.opacity) === 0) continue;
43
+ if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'TEXTAREA', 'HEAD'].includes(parentElement.tagName)) continue;
44
+ } else {
45
+ continue;
46
+ }
47
+
48
+ let searchPos = 0;
49
+ const text = node.nodeValue;
50
+
51
+ while (searchPos < text.length) {
52
+ if (activeSegment) {
53
+ const endMarkerPos = text.indexOf(END_MARKER, searchPos);
54
+
55
+ if (endMarkerPos !== -1) {
56
+ activeSegment.text += text.substring(searchPos, endMarkerPos);
57
+
58
+ const range = document.createRange();
59
+ range.setStart(activeSegment.startNode, activeSegment.startOffset);
60
+ range.setEnd(node, endMarkerPos);
61
+
62
+ const rect = range.getBoundingClientRect();
63
+ if (rect.width > 0 || rect.height > 0) {
64
+ let parsedMetadata = {};
65
+ try {
66
+ const decodedJsonMetadata = fe00RangeToUtf8_browser(activeSegment.encodedMetadata);
67
+ if (decodedJsonMetadata && decodedJsonMetadata.trim() !== '') {
68
+ parsedMetadata = JSON.parse(decodedJsonMetadata);
69
+ }
70
+ } catch (e) { parsedMetadata.decodingError = e.message; }
71
+
72
+ textElements.push({
73
+ text: activeSegment.text,
74
+ x: rect.left + window.scrollX,
75
+ y: rect.top + window.scrollY,
76
+ width: rect.width,
77
+ height: rect.height,
78
+ ...parsedMetadata
79
+ });
80
+ }
81
+
82
+ searchPos = endMarkerPos + 1;
83
+ activeSegment = null;
84
+ } else {
85
+ activeSegment.text += text.substring(searchPos);
86
+ break;
87
+ }
88
+ } else {
89
+ START_MARKER_REGEX.lastIndex = searchPos;
90
+ const match = START_MARKER_REGEX.exec(text);
91
+
92
+ if (match) {
93
+ const textAfterStart = text.substring(match.index + match[0].length);
94
+ const endMarkerPosInSubstring = textAfterStart.indexOf(END_MARKER);
95
+
96
+ if (endMarkerPosInSubstring !== -1) {
97
+ const capturedText = textAfterStart.substring(0, endMarkerPosInSubstring);
98
+
99
+ const range = document.createRange();
100
+ range.setStart(node, match.index);
101
+ const endOffset = match.index + match[0].length + endMarkerPosInSubstring;
102
+ range.setEnd(node, endOffset);
103
+
104
+ const rect = range.getBoundingClientRect();
105
+ if (rect.width > 0 || rect.height > 0) {
106
+ let parsedMetadata = {};
107
+ try {
108
+ const decodedJsonMetadata = fe00RangeToUtf8_browser(match[1]);
109
+ if (decodedJsonMetadata && decodedJsonMetadata.trim() !== '') {
110
+ parsedMetadata = JSON.parse(decodedJsonMetadata);
111
+ }
112
+ } catch (e) { parsedMetadata.decodingError = e.message; }
113
+
114
+ textElements.push({
115
+ text: capturedText,
116
+ x: rect.left + window.scrollX,
117
+ y: rect.top + window.scrollY,
118
+ width: rect.width,
119
+ height: rect.height,
120
+ ...parsedMetadata
121
+ });
122
+ }
123
+ searchPos = endOffset + 1;
124
+ } else {
125
+ activeSegment = {
126
+ startNode: node,
127
+ startOffset: match.index,
128
+ encodedMetadata: match[1],
129
+ text: textAfterStart
130
+ };
131
+ break;
132
+ }
133
+ } else {
134
+ break;
135
+ }
136
+ }
137
+ }
138
+ }
139
+ return { textElements };
140
+ }
141
+
142
+ export class FlowSnapshotter {
143
+ constructor(startUrl, flowNameBase, options = {}) {
144
+ this.browser = null;
145
+ this.startUrl = startUrl;
146
+ this.flowNameBase = flowNameBase;
147
+ this.headless = options.headless ?? false;
148
+ this.capturedPagesData = [];
149
+ this.pageCounter = 0;
150
+ }
151
+
152
+ async startFlow() {
153
+ this.browser = await puppeteer.launch({ headless: this.headless, defaultViewport: null });
154
+ this.page = await this.browser.newPage();
155
+ await this.page.goto(this.startUrl, { waitUntil: 'networkidle2', timeout: 60000 });
156
+ }
157
+
158
+ async capturePage() {
159
+ if (!this.browser) {
160
+ throw new Error('Browser not started. Call startFlow first.');
161
+ }
162
+
163
+ await this.page.bringToFront();
164
+
165
+ const screenshotBuffer = await this.page.screenshot({
166
+ fullPage: true,
167
+ type: 'png'
168
+ });
169
+
170
+ const currentPageUrl = this.page.url();
171
+ const textDataResult = await this.page.evaluate(extractTextAndMetadataInPageContext);
172
+ if (textDataResult.error) {
173
+ throw new Error(`Error extracting text data: ${textDataResult.error}`);
174
+ } else if (!textDataResult.textElements || textDataResult.textElements.length === 0) {
175
+ logInfo`No LQA metadata segments found on page ${currentPageUrl}`;
176
+ }
177
+ logVerbose`Page ${this.pageCounter} captured: ${currentPageUrl}`;
178
+ const capturedData = {
179
+ url: currentPageUrl,
180
+ timestamp: new Date().toISOString(),
181
+ screenshotBuffer,
182
+ text_content: textDataResult.textElements || [],
183
+ id: `page_${Date.now()}_${this.pageCounter++}`,
184
+ };
185
+ this.capturedPagesData.push(capturedData);
186
+ return capturedData;
187
+ }
188
+
189
+ async endFlow(tm) {
190
+ if (this.browser) {
191
+ await this.browser.close();
192
+ logVerbose`Browser closed.`;
193
+ this.browser = null;
194
+ this.page = null;
195
+ }
196
+ if (this.capturedPagesData.length === 0) {
197
+ return null;
198
+ }
199
+ logVerbose`Total pages in flow: ${this.capturedPagesData.length}`;
200
+ const zip = new JSZip();
201
+ const flowMetadata = {
202
+ flowName: this.flowNameBase,
203
+ createdAt: new Date().toISOString(),
204
+ pages: []
205
+ };
206
+ const guids = new Set();
207
+ this.capturedPagesData.forEach((pData, index) => {
208
+ const imageName = `page_${index + 1}_${pData.id}.png`;
209
+ zip.file(imageName, pData.screenshotBuffer);
210
+ flowMetadata.pages.push({
211
+ pageId: pData.id,
212
+ originalUrl: pData.url,
213
+ timestamp: pData.timestamp,
214
+ imageFile: imageName,
215
+ segments: pData.text_content
216
+ });
217
+ pData.text_content.forEach(segment => segment.g && guids.add(segment.g));
218
+ });
219
+ zip.file('flow_metadata.json', JSON.stringify(flowMetadata, null, 2));
220
+ if (tm) {
221
+ const job = {
222
+ sourceLang: tm.sourceLang,
223
+ targetLang: tm.targetLang,
224
+ tus: [],
225
+ };
226
+ guids.forEach(guid => {
227
+ const tu = tm.getEntryByGuid(guid);
228
+ tu && job.tus.push(tu);
229
+ });
230
+ if (job.tus.length > 0) {
231
+ zip.file('job.json', JSON.stringify(job, null, 2));
232
+ }
233
+ logVerbose`${guids.size} guids captured in flow, ${job.tus.length} found in TM`;
234
+ }
235
+ const buffer = await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } });
236
+ return buffer;
237
+ }
238
+ }
package/index.js ADDED
@@ -0,0 +1,2 @@
1
+ export { LQABossActions } from './lqabossActions.js';
2
+ export { LQABossProvider } from './lqabossProvider.js';
@@ -0,0 +1,10 @@
1
+ import { lqaboss_capture } from './lqabossCapture.js';
2
+
3
+ export class LQABossActions {
4
+ static name = 'lqaboss';
5
+ static help = {
6
+ description: 'Actions to integrate with LQA Boss.',
7
+ };
8
+
9
+ static subActions = [ lqaboss_capture ];
10
+ }
@@ -0,0 +1,97 @@
1
+ import fs from 'fs';
2
+ import readline from 'readline';
3
+ import { consoleLog } from '@l10nmonster/core';
4
+ import { FlowSnapshotter } from './flowCapture.js';
5
+
6
+ export class lqaboss_capture {
7
+ static help = {
8
+ description: 'create an lqaboss flow.',
9
+ arguments: [
10
+ ['<url>', 'the url of the page to capture'],
11
+ ['<flowName>', 'the name of the flow'],
12
+ ],
13
+ options: [
14
+ [ '--lang <srcLang,tgtLang>', 'source and target language pair' ],
15
+ ],
16
+ };
17
+
18
+ static async action(mm, options) {
19
+ if (!options.url || !options.flowName) {
20
+ throw new Error('You must specify a url and a flowName');
21
+ }
22
+ const langPairs = options.lang ? (Array.isArray(options.lang) ? options.lang : options.lang.split(',')) : null;
23
+ let tm;
24
+ if (langPairs) {
25
+ const [ sourceLang, targetLang ] = langPairs;
26
+ tm = mm.tmm.getTM(sourceLang, targetLang);
27
+ }
28
+ // Run the capture flow
29
+ const lqaBossBuffer = await runCapture(options.url, options.flowName, tm);
30
+ if (lqaBossBuffer) {
31
+ const filename = `${options.flowName.replace(/[^a-z0-9_.-]/gi, '_')}.lqaboss`;
32
+ await fs.promises.writeFile(filename, lqaBossBuffer);
33
+ consoleLog`Flow successfully saved as ${filename}`;
34
+ } else {
35
+ console.log('No pages were captured. Nothing to save.');
36
+ }
37
+ }
38
+ }
39
+
40
+ async function runCapture(startUrl, flowNameBase, tm) {
41
+ const snapShotter = new FlowSnapshotter(startUrl, flowNameBase);
42
+ try {
43
+ consoleLog`Navigating to ${startUrl}...`;
44
+ await snapShotter.startFlow();
45
+ consoleLog`----------------------------------------------------`;
46
+ consoleLog` LQA Boss CLI Capture Mode`;
47
+ consoleLog`----------------------------------------------------`;
48
+ consoleLog` - Interact with the opened Chrome window to navigate.`;
49
+ consoleLog` - Return to this terminal to issue commands.`;
50
+ consoleLog` - Press ENTER to capture the current page.`;
51
+ consoleLog` - Type 'q' then ENTER to finish and save the flow.`;
52
+ consoleLog`----------------------------------------------------`;
53
+ const rl = readline.createInterface({
54
+ input: process.stdin,
55
+ output: process.stdout,
56
+ prompt: '> ' // Optional: show a prompt character
57
+ });
58
+ const question = (query) => new Promise(resolve => rl.question(query, resolve));
59
+ // rl.on('close', () => {
60
+ // consoleLog`Input stream closed.`;
61
+ // });
62
+
63
+ while (true) {
64
+ const answer = await question('Press ENTER to capture, or type "q" then ENTER to quit: ');
65
+
66
+ if (answer.toLowerCase().trim() === 'q') {
67
+ consoleLog`Quit command received.`;
68
+ break; // Exit the loop to proceed to saving
69
+ }
70
+
71
+ // Any input that is just ENTER (empty string after trim) or anything not 'q'
72
+ // will be treated as a capture command.
73
+ // We can be more explicit if needed, but empty string for ENTER is common.
74
+ if (answer.trim() === '' || answer.toLowerCase().trim() !== 'q') {
75
+ if (answer.trim() !== '') {
76
+ consoleLog`Input "${answer.trim()}" received, treating as CAPTURE command.`;
77
+ } else {
78
+ consoleLog`ENTER key received, treating as CAPTURE command.`;
79
+ }
80
+
81
+ consoleLog`Capturing current page...`;
82
+ try {
83
+ await snapShotter.capturePage();
84
+ } catch (err) {
85
+ console.error("! Error during capture:", err.message);
86
+ }
87
+ }
88
+ // No 'else' needed here as we only break on 'q' or proceed with capture.
89
+ }
90
+
91
+ rl.close(); // Close readline before saving
92
+ } catch (error) {
93
+ console.error("An error occurred:", error);
94
+ return null;
95
+ }
96
+ return await snapShotter.endFlow(tm);
97
+ }
@@ -0,0 +1,74 @@
1
+ import JSZip from 'jszip';
2
+ import { getRegressionMode, providers, logVerbose, logInfo, styleString, utils, opsManager, logError, logWarn } from '@l10nmonster/core';
3
+
4
+ /**
5
+ * @typedef {object} LQABossProviderOptions
6
+ * @extends BaseTranslationProvider
7
+ * @property {Object} delegate - Required file store delegate implementing file operations
8
+ */
9
+
10
+ /**
11
+ * Provider for LQA Boss.
12
+ */
13
+ export class LQABossProvider extends providers.BaseTranslationProvider {
14
+ #storageDelegate;
15
+ #opNames = {};
16
+
17
+ /**
18
+ * Initializes a new instance of the LQABossProvider class.
19
+ * @param {LQABossProviderOptions} options - Configuration options for the provider.
20
+ */
21
+ constructor({ delegate, ...options }) {
22
+ super(options);
23
+ this.#storageDelegate = delegate;
24
+ this.#opNames.startReviewOp = `${this.id}.startReviewOp`;
25
+ this.#opNames.continueReviewOp = `${this.id}.continueReviewOp`;
26
+ this.#opNames.completeReviewOp = `${this.id}.completeReviewOp`;
27
+ opsManager.registerOp(this.startReviewOp.bind(this), { opName: this.#opNames.startReviewOp, idempotent: false });
28
+ opsManager.registerOp(this.continueReviewOp.bind(this), { opName: this.#opNames.continueReviewOp, idempotent: true });
29
+ opsManager.registerOp(this.completeReviewOp.bind(this), { opName: this.#opNames.completeReviewOp, idempotent: true });
30
+ }
31
+
32
+ createTask(job) {
33
+ logVerbose`LQABossProvider creating task for job ${job.jobGuid}`;
34
+ const requestTranslationsTask = opsManager.createTask(this.id, this.#opNames.completeReviewOp);
35
+ requestTranslationsTask.rootOp.enqueue(this.#opNames.startReviewOp, { job });
36
+ return requestTranslationsTask;
37
+ }
38
+
39
+ async startReviewOp(op) {
40
+ const { job } = op.args;
41
+ const zip = new JSZip();
42
+ zip.file('job.json', JSON.stringify(job, null, 2));
43
+ const buffer = await zip.generateAsync({ type: 'nodebuffer', compression: 'DEFLATE', compressionOptions: { level: 6 } });
44
+ const filename = `${job.jobGuid}.lqaboss`;
45
+ await this.#storageDelegate.saveFile(filename, buffer);
46
+ logVerbose`LQABoss file ${filename} with ${job.tus.length} guids and ${buffer.length} bytes saved`;
47
+ op.parentTask.rootOp.enqueue(this.#opNames.continueReviewOp, { jobGuid: job.jobGuid });
48
+ }
49
+
50
+ /**
51
+ * Fetches the completed review job. This will error out until the review is complete.
52
+ * @param {object} op - The operation context containing fetch parameters.
53
+ * @returns {Promise<*>} The job response.
54
+ */
55
+ async continueReviewOp(op) {
56
+ const filename = `${op.args.jobGuid}.json`;
57
+ logVerbose`Trying to fetch completed LQABoss file ${filename}`;
58
+ return JSON.parse(await this.#storageDelegate.getFile(filename));
59
+ }
60
+
61
+ async completeReviewOp(op) {
62
+ const { tus, ...jobResponse } = op.inputs[1]; // the second op should be continueReviewOp
63
+ jobResponse.status = 'done';
64
+ const ts = getRegressionMode() ? 1 : new Date().getTime();
65
+ jobResponse.tus = tus.map(tu => ({ ...tu, ts, q: this.quality }));
66
+ return jobResponse;
67
+ }
68
+
69
+ async info() {
70
+ const info = await super.info();
71
+ info.description.push(styleString`Storage delegate: ${this.#storageDelegate.toString()}`);
72
+ return info;
73
+ }
74
+ }
package/package.json ADDED
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "@l10nmonster/helpers-lqaboss",
3
+ "version": "3.0.0-alpha.1",
4
+ "description": "LQA Boss helper for L10n Monster",
5
+ "main": "index.js",
6
+ "type": "module",
7
+ "scripts": {
8
+ "start": "node index.js",
9
+ "test": "node --test test/*.test.js"
10
+ },
11
+ "dependencies": {
12
+ "jszip": "^3.10.1",
13
+ "puppeteer": "^24"
14
+ },
15
+ "peerDependencies": {
16
+ "@l10nmonster/core": "file:../core"
17
+ }
18
+ }