@code.store/arcxp-sdk-ts 5.2.0 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/author/index.d.ts +1 -1
- package/dist/api/content/index.d.ts +1 -1
- package/dist/api/content-ops/index.d.ts +1 -1
- package/dist/api/custom/index.d.ts +1 -1
- package/dist/api/developer-retail/index.d.ts +1 -1
- package/dist/api/draft/index.d.ts +1 -1
- package/dist/api/global-settings/index.d.ts +1 -1
- package/dist/api/identity/index.d.ts +1 -1
- package/dist/api/ifx/index.d.ts +1 -1
- package/dist/api/index.d.ts +2 -2
- package/dist/api/migration-center/index.d.ts +1 -1
- package/dist/api/photo-center/index.d.ts +1 -1
- package/dist/api/redirect/index.d.ts +2 -2
- package/dist/api/sales/index.d.ts +1 -1
- package/dist/api/signing-service/index.d.ts +1 -1
- package/dist/api/site/index.d.ts +1 -1
- package/dist/api/tags/index.d.ts +1 -1
- package/dist/api/websked/index.d.ts +1 -1
- package/dist/content-elements/html/html.utils.d.ts +0 -3
- package/dist/content-elements/index.d.ts +1 -0
- package/dist/content-elements/xml/index.d.ts +3 -0
- package/dist/content-elements/xml/xml.constants.d.ts +1 -0
- package/dist/content-elements/xml/xml.processor.d.ts +45 -0
- package/dist/content-elements/xml/xml.utils.d.ts +5 -0
- package/dist/index.cjs +1146 -921
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +4 -4
- package/dist/index.js +1143 -919
- package/dist/index.js.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/utils/arc/content.d.ts +3 -1
- package/package.json +12 -5
package/dist/index.cjs
CHANGED
|
@@ -9,11 +9,12 @@ var fs = require('node:fs');
|
|
|
9
9
|
var path = require('node:path');
|
|
10
10
|
var FormData = require('form-data');
|
|
11
11
|
var ws = require('ws');
|
|
12
|
+
var nodeHtmlParser = require('node-html-parser');
|
|
13
|
+
var htmlEntities = require('html-entities');
|
|
14
|
+
var xmldoc = require('xmldoc');
|
|
12
15
|
var encode = require('base32-encode');
|
|
13
16
|
var uuid = require('uuid');
|
|
14
17
|
var assert = require('node:assert');
|
|
15
|
-
var nodeHtmlParser = require('node-html-parser');
|
|
16
|
-
var htmlEntities = require('html-entities');
|
|
17
18
|
|
|
18
19
|
function _interopNamespaceDefault(e) {
|
|
19
20
|
var n = Object.create(null);
|
|
@@ -34,6 +35,7 @@ function _interopNamespaceDefault(e) {
|
|
|
34
35
|
|
|
35
36
|
var rateLimit__namespace = /*#__PURE__*/_interopNamespaceDefault(rateLimit);
|
|
36
37
|
var ws__namespace = /*#__PURE__*/_interopNamespaceDefault(ws);
|
|
38
|
+
var xmldoc__namespace = /*#__PURE__*/_interopNamespaceDefault(xmldoc);
|
|
37
39
|
|
|
38
40
|
const safeJSONStringify = (data) => {
|
|
39
41
|
try {
|
|
@@ -137,28 +139,6 @@ class ArcAuthor extends ArcAbstractAPI {
|
|
|
137
139
|
}
|
|
138
140
|
}
|
|
139
141
|
|
|
140
|
-
class ArcContentOps extends ArcAbstractAPI {
|
|
141
|
-
constructor(options) {
|
|
142
|
-
super({ ...options, apiPath: 'contentops/v1' });
|
|
143
|
-
}
|
|
144
|
-
async schedulePublish(payload) {
|
|
145
|
-
const { data } = await this.client.put('/publish', payload);
|
|
146
|
-
return data;
|
|
147
|
-
}
|
|
148
|
-
async scheduleUnpublish(payload) {
|
|
149
|
-
const { data } = await this.client.put('/unpublish', payload);
|
|
150
|
-
return data;
|
|
151
|
-
}
|
|
152
|
-
async unscheduleUnpublish(payload) {
|
|
153
|
-
const { data } = await this.client.put('/unschedule_unpublish', payload);
|
|
154
|
-
return data;
|
|
155
|
-
}
|
|
156
|
-
async unschedulePublish(payload) {
|
|
157
|
-
const { data } = await this.client.put('/unschedule_publish', payload);
|
|
158
|
-
return data;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
142
|
class ArcContent extends ArcAbstractAPI {
|
|
163
143
|
constructor(options) {
|
|
164
144
|
super({ ...options, apiPath: 'content/v4' });
|
|
@@ -183,6 +163,28 @@ class ArcContent extends ArcAbstractAPI {
|
|
|
183
163
|
}
|
|
184
164
|
}
|
|
185
165
|
|
|
166
|
+
class ArcContentOps extends ArcAbstractAPI {
|
|
167
|
+
constructor(options) {
|
|
168
|
+
super({ ...options, apiPath: 'contentops/v1' });
|
|
169
|
+
}
|
|
170
|
+
async schedulePublish(payload) {
|
|
171
|
+
const { data } = await this.client.put('/publish', payload);
|
|
172
|
+
return data;
|
|
173
|
+
}
|
|
174
|
+
async scheduleUnpublish(payload) {
|
|
175
|
+
const { data } = await this.client.put('/unpublish', payload);
|
|
176
|
+
return data;
|
|
177
|
+
}
|
|
178
|
+
async unscheduleUnpublish(payload) {
|
|
179
|
+
const { data } = await this.client.put('/unschedule_unpublish', payload);
|
|
180
|
+
return data;
|
|
181
|
+
}
|
|
182
|
+
async unschedulePublish(payload) {
|
|
183
|
+
const { data } = await this.client.put('/unschedule_publish', payload);
|
|
184
|
+
return data;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
186
188
|
class Custom extends ArcAbstractAPI {
|
|
187
189
|
constructor(options) {
|
|
188
190
|
super({ ...options, apiPath: '' });
|
|
@@ -196,6 +198,128 @@ class Custom extends ArcAbstractAPI {
|
|
|
196
198
|
}
|
|
197
199
|
}
|
|
198
200
|
|
|
201
|
+
class ArcDeveloperRetail extends ArcAbstractAPI {
|
|
202
|
+
constructor(options) {
|
|
203
|
+
super({ ...options, apiPath: 'retail/api/v1' });
|
|
204
|
+
}
|
|
205
|
+
// ============================================
|
|
206
|
+
// Product Methods
|
|
207
|
+
// ============================================
|
|
208
|
+
async getProductById(id, params) {
|
|
209
|
+
const { data } = await this.client.get(`/product/${id}`, { params });
|
|
210
|
+
return data;
|
|
211
|
+
}
|
|
212
|
+
async getProductBySku(sku, params) {
|
|
213
|
+
const { data } = await this.client.get(`/product/sku/${sku}`, { params });
|
|
214
|
+
return data;
|
|
215
|
+
}
|
|
216
|
+
async getProductByPriceCode(priceCode, params) {
|
|
217
|
+
const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
|
|
218
|
+
return data;
|
|
219
|
+
}
|
|
220
|
+
async getAllProducts(params) {
|
|
221
|
+
const { data } = await this.client.get('/product', { params });
|
|
222
|
+
return data;
|
|
223
|
+
}
|
|
224
|
+
// ============================================
|
|
225
|
+
// Pricing Strategy Methods
|
|
226
|
+
// ============================================
|
|
227
|
+
async getPricingStrategyById(id, params) {
|
|
228
|
+
const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
|
|
229
|
+
return data;
|
|
230
|
+
}
|
|
231
|
+
async getAllPricingStrategies(params) {
|
|
232
|
+
const { data } = await this.client.get('/pricing/strategy', { params });
|
|
233
|
+
return data;
|
|
234
|
+
}
|
|
235
|
+
// ============================================
|
|
236
|
+
// Pricing Rate Methods
|
|
237
|
+
// ============================================
|
|
238
|
+
async getPricingRateById(id, params) {
|
|
239
|
+
const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
|
|
240
|
+
return data;
|
|
241
|
+
}
|
|
242
|
+
async getAllPricingRates(params) {
|
|
243
|
+
const { data } = await this.client.get('/pricing/rate', { params });
|
|
244
|
+
return data;
|
|
245
|
+
}
|
|
246
|
+
// ============================================
|
|
247
|
+
// Pricing Cycle Methods
|
|
248
|
+
// ============================================
|
|
249
|
+
async getPricingCycle(priceCode, cycleIndex, startDate, params) {
|
|
250
|
+
const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
|
|
251
|
+
params,
|
|
252
|
+
});
|
|
253
|
+
return data;
|
|
254
|
+
}
|
|
255
|
+
// ============================================
|
|
256
|
+
// Campaign Methods
|
|
257
|
+
// ============================================
|
|
258
|
+
async getCampaignById(id, params) {
|
|
259
|
+
const { data } = await this.client.get(`/campaign/${id}`, { params });
|
|
260
|
+
return data;
|
|
261
|
+
}
|
|
262
|
+
async getCampaignByName(campaignName, params) {
|
|
263
|
+
const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
|
|
264
|
+
return data;
|
|
265
|
+
}
|
|
266
|
+
async getAllCampaigns(params) {
|
|
267
|
+
const { data } = await this.client.get('/campaign', { params });
|
|
268
|
+
return data;
|
|
269
|
+
}
|
|
270
|
+
// ============================================
|
|
271
|
+
// Campaign Category Methods
|
|
272
|
+
// ============================================
|
|
273
|
+
async getCampaignCategoryById(id, params) {
|
|
274
|
+
const { data } = await this.client.get(`/campaign/category/${id}`, { params });
|
|
275
|
+
return data;
|
|
276
|
+
}
|
|
277
|
+
async getAllCampaignCategories(params) {
|
|
278
|
+
const { data } = await this.client.get('/campaign/category', { params });
|
|
279
|
+
return data;
|
|
280
|
+
}
|
|
281
|
+
// ============================================
|
|
282
|
+
// Offer Methods
|
|
283
|
+
// ============================================
|
|
284
|
+
async getOfferById(id, params) {
|
|
285
|
+
const { data } = await this.client.get(`/offer/${id}`, { params });
|
|
286
|
+
return data;
|
|
287
|
+
}
|
|
288
|
+
async getAllOffers(params) {
|
|
289
|
+
const { data } = await this.client.get('/offer', { params });
|
|
290
|
+
return data;
|
|
291
|
+
}
|
|
292
|
+
// ============================================
|
|
293
|
+
// Offer Attribute Methods
|
|
294
|
+
// ============================================
|
|
295
|
+
async getOfferAttributeById(id, params) {
|
|
296
|
+
const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
|
|
297
|
+
return data;
|
|
298
|
+
}
|
|
299
|
+
async getAllOfferAttributes(params) {
|
|
300
|
+
const { data } = await this.client.get('/offer/attribute', { params });
|
|
301
|
+
return data;
|
|
302
|
+
}
|
|
303
|
+
// ============================================
|
|
304
|
+
// Product Attribute Methods
|
|
305
|
+
// ============================================
|
|
306
|
+
async getProductAttributeById(id, params) {
|
|
307
|
+
const { data } = await this.client.get(`/product/attribute/${id}`, { params });
|
|
308
|
+
return data;
|
|
309
|
+
}
|
|
310
|
+
async getAllProductAttributes(params) {
|
|
311
|
+
const { data } = await this.client.get('/product/attribute', { params });
|
|
312
|
+
return data;
|
|
313
|
+
}
|
|
314
|
+
// ============================================
|
|
315
|
+
// Condition Category Methods
|
|
316
|
+
// ============================================
|
|
317
|
+
async getAllConditionCategories(params) {
|
|
318
|
+
const { data } = await this.client.get('/condition/categories', { params });
|
|
319
|
+
return data;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
199
323
|
class ArcDraft extends ArcAbstractAPI {
|
|
200
324
|
constructor(options) {
|
|
201
325
|
super({ ...options, apiPath: 'draft/v1' });
|
|
@@ -592,190 +716,68 @@ class ArcRetailEvents {
|
|
|
592
716
|
}
|
|
593
717
|
}
|
|
594
718
|
|
|
595
|
-
class
|
|
719
|
+
class ArcSales extends ArcAbstractAPI {
|
|
596
720
|
constructor(options) {
|
|
597
|
-
super({ ...options, apiPath: '
|
|
721
|
+
super({ ...options, apiPath: 'sales/api/v1' });
|
|
598
722
|
}
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
const { data } = await this.client.
|
|
723
|
+
async migrate(params, payload) {
|
|
724
|
+
const FormData = await platform.form_data();
|
|
725
|
+
const form = new FormData();
|
|
726
|
+
form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
|
|
727
|
+
const { data } = await this.client.post('/migrate', form, {
|
|
728
|
+
params,
|
|
729
|
+
headers: {
|
|
730
|
+
...form.getHeaders(),
|
|
731
|
+
},
|
|
732
|
+
});
|
|
604
733
|
return data;
|
|
605
734
|
}
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
735
|
+
}
|
|
736
|
+
class ArcSalesV2 extends ArcAbstractAPI {
|
|
737
|
+
constructor(options) {
|
|
738
|
+
super({ ...options, apiPath: 'sales/api/v2' });
|
|
609
739
|
}
|
|
610
|
-
async
|
|
611
|
-
const { data } = await this.client.get(
|
|
740
|
+
async getEnterpriseGroups(params) {
|
|
741
|
+
const { data } = await this.client.get('/subscriptions/enterprise', {
|
|
742
|
+
params: {
|
|
743
|
+
'arc-site': params.site,
|
|
744
|
+
},
|
|
745
|
+
});
|
|
612
746
|
return data;
|
|
613
747
|
}
|
|
614
|
-
async
|
|
615
|
-
const { data } = await this.client.
|
|
748
|
+
async createEnterpriseGroup(params, payload) {
|
|
749
|
+
const { data } = await this.client.post('/subscriptions/enterprise', payload, {
|
|
750
|
+
params: {
|
|
751
|
+
'arc-site': params.site,
|
|
752
|
+
},
|
|
753
|
+
});
|
|
616
754
|
return data;
|
|
617
755
|
}
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
|
|
756
|
+
async createNonce(website, enterpriseGroupId) {
|
|
757
|
+
const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
|
|
758
|
+
params: { 'arc-site': website },
|
|
759
|
+
});
|
|
623
760
|
return data;
|
|
624
761
|
}
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
class ArcSigningService extends ArcAbstractAPI {
|
|
765
|
+
constructor(options) {
|
|
766
|
+
super({ ...options, apiPath: 'signing-service' });
|
|
628
767
|
}
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
// ============================================
|
|
632
|
-
async getPricingRateById(id, params) {
|
|
633
|
-
const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
|
|
768
|
+
async sign(service, serviceVersion, imageId) {
|
|
769
|
+
const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
|
|
634
770
|
return data;
|
|
635
771
|
}
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
class ArcSite extends ArcAbstractAPI {
|
|
775
|
+
constructor(options) {
|
|
776
|
+
super({ ...options, apiPath: 'site/v3' });
|
|
639
777
|
}
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
async getPricingCycle(priceCode, cycleIndex, startDate, params) {
|
|
644
|
-
const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
|
|
645
|
-
params,
|
|
646
|
-
});
|
|
647
|
-
return data;
|
|
648
|
-
}
|
|
649
|
-
// ============================================
|
|
650
|
-
// Campaign Methods
|
|
651
|
-
// ============================================
|
|
652
|
-
async getCampaignById(id, params) {
|
|
653
|
-
const { data } = await this.client.get(`/campaign/${id}`, { params });
|
|
654
|
-
return data;
|
|
655
|
-
}
|
|
656
|
-
async getCampaignByName(campaignName, params) {
|
|
657
|
-
const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
|
|
658
|
-
return data;
|
|
659
|
-
}
|
|
660
|
-
async getAllCampaigns(params) {
|
|
661
|
-
const { data } = await this.client.get('/campaign', { params });
|
|
662
|
-
return data;
|
|
663
|
-
}
|
|
664
|
-
// ============================================
|
|
665
|
-
// Campaign Category Methods
|
|
666
|
-
// ============================================
|
|
667
|
-
async getCampaignCategoryById(id, params) {
|
|
668
|
-
const { data } = await this.client.get(`/campaign/category/${id}`, { params });
|
|
669
|
-
return data;
|
|
670
|
-
}
|
|
671
|
-
async getAllCampaignCategories(params) {
|
|
672
|
-
const { data } = await this.client.get('/campaign/category', { params });
|
|
673
|
-
return data;
|
|
674
|
-
}
|
|
675
|
-
// ============================================
|
|
676
|
-
// Offer Methods
|
|
677
|
-
// ============================================
|
|
678
|
-
async getOfferById(id, params) {
|
|
679
|
-
const { data } = await this.client.get(`/offer/${id}`, { params });
|
|
680
|
-
return data;
|
|
681
|
-
}
|
|
682
|
-
async getAllOffers(params) {
|
|
683
|
-
const { data } = await this.client.get('/offer', { params });
|
|
684
|
-
return data;
|
|
685
|
-
}
|
|
686
|
-
// ============================================
|
|
687
|
-
// Offer Attribute Methods
|
|
688
|
-
// ============================================
|
|
689
|
-
async getOfferAttributeById(id, params) {
|
|
690
|
-
const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
|
|
691
|
-
return data;
|
|
692
|
-
}
|
|
693
|
-
async getAllOfferAttributes(params) {
|
|
694
|
-
const { data } = await this.client.get('/offer/attribute', { params });
|
|
695
|
-
return data;
|
|
696
|
-
}
|
|
697
|
-
// ============================================
|
|
698
|
-
// Product Attribute Methods
|
|
699
|
-
// ============================================
|
|
700
|
-
async getProductAttributeById(id, params) {
|
|
701
|
-
const { data } = await this.client.get(`/product/attribute/${id}`, { params });
|
|
702
|
-
return data;
|
|
703
|
-
}
|
|
704
|
-
async getAllProductAttributes(params) {
|
|
705
|
-
const { data } = await this.client.get('/product/attribute', { params });
|
|
706
|
-
return data;
|
|
707
|
-
}
|
|
708
|
-
// ============================================
|
|
709
|
-
// Condition Category Methods
|
|
710
|
-
// ============================================
|
|
711
|
-
async getAllConditionCategories(params) {
|
|
712
|
-
const { data } = await this.client.get('/condition/categories', { params });
|
|
713
|
-
return data;
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
class ArcSales extends ArcAbstractAPI {
|
|
718
|
-
constructor(options) {
|
|
719
|
-
super({ ...options, apiPath: 'sales/api/v1' });
|
|
720
|
-
}
|
|
721
|
-
async migrate(params, payload) {
|
|
722
|
-
const FormData = await platform.form_data();
|
|
723
|
-
const form = new FormData();
|
|
724
|
-
form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
|
|
725
|
-
const { data } = await this.client.post('/migrate', form, {
|
|
726
|
-
params,
|
|
727
|
-
headers: {
|
|
728
|
-
...form.getHeaders(),
|
|
729
|
-
},
|
|
730
|
-
});
|
|
731
|
-
return data;
|
|
732
|
-
}
|
|
733
|
-
}
|
|
734
|
-
class ArcSalesV2 extends ArcAbstractAPI {
|
|
735
|
-
constructor(options) {
|
|
736
|
-
super({ ...options, apiPath: 'sales/api/v2' });
|
|
737
|
-
}
|
|
738
|
-
async getEnterpriseGroups(params) {
|
|
739
|
-
const { data } = await this.client.get('/subscriptions/enterprise', {
|
|
740
|
-
params: {
|
|
741
|
-
'arc-site': params.site,
|
|
742
|
-
},
|
|
743
|
-
});
|
|
744
|
-
return data;
|
|
745
|
-
}
|
|
746
|
-
async createEnterpriseGroup(params, payload) {
|
|
747
|
-
const { data } = await this.client.post('/subscriptions/enterprise', payload, {
|
|
748
|
-
params: {
|
|
749
|
-
'arc-site': params.site,
|
|
750
|
-
},
|
|
751
|
-
});
|
|
752
|
-
return data;
|
|
753
|
-
}
|
|
754
|
-
async createNonce(website, enterpriseGroupId) {
|
|
755
|
-
const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
|
|
756
|
-
params: { 'arc-site': website },
|
|
757
|
-
});
|
|
758
|
-
return data;
|
|
759
|
-
}
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
class ArcSigningService extends ArcAbstractAPI {
|
|
763
|
-
constructor(options) {
|
|
764
|
-
super({ ...options, apiPath: 'signing-service' });
|
|
765
|
-
}
|
|
766
|
-
async sign(service, serviceVersion, imageId) {
|
|
767
|
-
const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
|
|
768
|
-
return data;
|
|
769
|
-
}
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
class ArcSite extends ArcAbstractAPI {
|
|
773
|
-
constructor(options) {
|
|
774
|
-
super({ ...options, apiPath: 'site/v3' });
|
|
775
|
-
}
|
|
776
|
-
async getSections(params) {
|
|
777
|
-
const { data } = await this.client.get(`/website/${params.website}/section`, {
|
|
778
|
-
params: { _website: params.website, ...params },
|
|
778
|
+
async getSections(params) {
|
|
779
|
+
const { data } = await this.client.get(`/website/${params.website}/section`, {
|
|
780
|
+
params: { _website: params.website, ...params },
|
|
779
781
|
});
|
|
780
782
|
return data;
|
|
781
783
|
}
|
|
@@ -913,85 +915,7 @@ const ArcAPI = (options) => {
|
|
|
913
915
|
return API;
|
|
914
916
|
};
|
|
915
917
|
|
|
916
|
-
|
|
917
|
-
/**
|
|
918
|
-
* This file was automatically generated by json-schema-to-typescript.
|
|
919
|
-
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
920
|
-
* and run json-schema-to-typescript to regenerate this file.
|
|
921
|
-
*/
|
|
922
|
-
|
|
923
|
-
var ansTypes = /*#__PURE__*/Object.freeze({
|
|
924
|
-
__proto__: null
|
|
925
|
-
});
|
|
926
|
-
|
|
927
|
-
var utils = /*#__PURE__*/Object.freeze({
|
|
928
|
-
__proto__: null
|
|
929
|
-
});
|
|
930
|
-
|
|
931
|
-
var ANSType;
|
|
932
|
-
(function (ANSType) {
|
|
933
|
-
ANSType["Story"] = "story";
|
|
934
|
-
ANSType["Video"] = "video";
|
|
935
|
-
ANSType["Tag"] = "tag";
|
|
936
|
-
ANSType["Author"] = "author";
|
|
937
|
-
ANSType["Gallery"] = "gallery";
|
|
938
|
-
ANSType["Image"] = "image";
|
|
939
|
-
ANSType["Redirect"] = "redirect";
|
|
940
|
-
})(ANSType || (ANSType = {}));
|
|
941
|
-
var MigrationStatus;
|
|
942
|
-
(function (MigrationStatus) {
|
|
943
|
-
MigrationStatus["Success"] = "Success";
|
|
944
|
-
MigrationStatus["Queued"] = "Queued";
|
|
945
|
-
MigrationStatus["Circulated"] = "Circulated";
|
|
946
|
-
MigrationStatus["Published"] = "Published";
|
|
947
|
-
MigrationStatus["Scheduled"] = "Scheduled";
|
|
948
|
-
MigrationStatus["FailVideo"] = "FailVideo";
|
|
949
|
-
MigrationStatus["FailImage"] = "FailImage";
|
|
950
|
-
MigrationStatus["FailPhoto"] = "FailPhoto";
|
|
951
|
-
MigrationStatus["FailStory"] = "FailStory";
|
|
952
|
-
MigrationStatus["FailGallery"] = "FailGallery";
|
|
953
|
-
MigrationStatus["FailAuthor"] = "FailAuthor";
|
|
954
|
-
MigrationStatus["FailTag"] = "FailTag";
|
|
955
|
-
MigrationStatus["ValidationFailed"] = "ValidationFailed";
|
|
956
|
-
})(MigrationStatus || (MigrationStatus = {}));
|
|
957
|
-
var SummarySortBy;
|
|
958
|
-
(function (SummarySortBy) {
|
|
959
|
-
SummarySortBy["CreateDate"] = "createDate";
|
|
960
|
-
SummarySortBy["UpdateDate"] = "updateDate";
|
|
961
|
-
SummarySortBy["Id"] = "id";
|
|
962
|
-
})(SummarySortBy || (SummarySortBy = {}));
|
|
963
|
-
var SummarySortOrder;
|
|
964
|
-
(function (SummarySortOrder) {
|
|
965
|
-
SummarySortOrder["ASC"] = "ASC";
|
|
966
|
-
SummarySortOrder["DESC"] = "DESC";
|
|
967
|
-
})(SummarySortOrder || (SummarySortOrder = {}));
|
|
968
|
-
|
|
969
|
-
var index$3 = /*#__PURE__*/Object.freeze({
|
|
970
|
-
__proto__: null,
|
|
971
|
-
ANS: ansTypes,
|
|
972
|
-
get ANSType () { return ANSType; },
|
|
973
|
-
get MigrationStatus () { return MigrationStatus; },
|
|
974
|
-
get SummarySortBy () { return SummarySortBy; },
|
|
975
|
-
get SummarySortOrder () { return SummarySortOrder; },
|
|
976
|
-
TypeUtils: utils
|
|
977
|
-
});
|
|
978
|
-
|
|
979
|
-
const reference = (ref) => {
|
|
980
|
-
return {
|
|
981
|
-
_id: ref.id,
|
|
982
|
-
type: 'reference',
|
|
983
|
-
referent: {
|
|
984
|
-
...ref,
|
|
985
|
-
},
|
|
986
|
-
};
|
|
987
|
-
};
|
|
988
|
-
|
|
989
|
-
var ANS = /*#__PURE__*/Object.freeze({
|
|
990
|
-
__proto__: null,
|
|
991
|
-
reference: reference
|
|
992
|
-
});
|
|
993
|
-
|
|
994
|
-
const ContentElement = {
|
|
918
|
+
const ContentElement$1 = {
|
|
995
919
|
divider: () => {
|
|
996
920
|
return {
|
|
997
921
|
type: 'divider',
|
|
@@ -1223,18 +1147,60 @@ const ContentElement = {
|
|
|
1223
1147
|
},
|
|
1224
1148
|
};
|
|
1225
1149
|
|
|
1150
|
+
const BLOCK_ELEMENT_TAGS$1 = [
|
|
1151
|
+
'ADDRESS',
|
|
1152
|
+
'ARTICLE',
|
|
1153
|
+
'ASIDE',
|
|
1154
|
+
'BLOCKQUOTE',
|
|
1155
|
+
'DETAILS',
|
|
1156
|
+
'DIV',
|
|
1157
|
+
'DL',
|
|
1158
|
+
'FIELDSET',
|
|
1159
|
+
'FIGCAPTION',
|
|
1160
|
+
'FIGURE',
|
|
1161
|
+
'FOOTER',
|
|
1162
|
+
'FORM',
|
|
1163
|
+
'H1',
|
|
1164
|
+
'H2',
|
|
1165
|
+
'H3',
|
|
1166
|
+
'H4',
|
|
1167
|
+
'H5',
|
|
1168
|
+
'H6',
|
|
1169
|
+
'HEADER',
|
|
1170
|
+
'HR',
|
|
1171
|
+
'LINE',
|
|
1172
|
+
'MAIN',
|
|
1173
|
+
'MENU',
|
|
1174
|
+
'NAV',
|
|
1175
|
+
'OL',
|
|
1176
|
+
'P',
|
|
1177
|
+
'PARAGRAPH',
|
|
1178
|
+
'PRE',
|
|
1179
|
+
'SECTION',
|
|
1180
|
+
'TABLE',
|
|
1181
|
+
'UL',
|
|
1182
|
+
'LI',
|
|
1183
|
+
'BODY',
|
|
1184
|
+
'HTML',
|
|
1185
|
+
];
|
|
1186
|
+
|
|
1187
|
+
var html_constants = /*#__PURE__*/Object.freeze({
|
|
1188
|
+
__proto__: null,
|
|
1189
|
+
BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS$1
|
|
1190
|
+
});
|
|
1191
|
+
|
|
1226
1192
|
const socialRegExps = {
|
|
1227
|
-
instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9
|
|
1228
|
-
twitter: /https:\/\/(?:www\.)?twitter\.com\/[
|
|
1229
|
-
tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId
|
|
1230
|
-
facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[
|
|
1231
|
-
facebookVideo: /https:\/\/www\.facebook\.com\/([
|
|
1193
|
+
instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9._-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-_.]+)\/?([0-9]+)?/,
|
|
1194
|
+
twitter: /https:\/\/(?:www\.)?twitter\.com\/[^/]+\/status(?:es)?\/(\d+)/,
|
|
1195
|
+
tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|&item_id=)(\d+))|\w+)/,
|
|
1196
|
+
facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^/]+\/(activity|posts))[/?].*$/,
|
|
1197
|
+
facebookVideo: /https:\/\/www\.facebook\.com\/([^/?].+\/)?video(s|\.php)[/?].*/,
|
|
1232
1198
|
};
|
|
1233
1199
|
function match(url, regex) {
|
|
1234
1200
|
return url.match(regex)?.[0];
|
|
1235
1201
|
}
|
|
1236
1202
|
function youtubeURLParser(url = '') {
|
|
1237
|
-
const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[
|
|
1203
|
+
const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
|
|
1238
1204
|
const id = url?.match(regExp)?.[1];
|
|
1239
1205
|
if (id) {
|
|
1240
1206
|
return `https://youtu.be/${id}`;
|
|
@@ -1264,273 +1230,698 @@ function createSocial(url = '') {
|
|
|
1264
1230
|
const embeds = [];
|
|
1265
1231
|
const instagram = instagramURLParser(url);
|
|
1266
1232
|
if (instagram) {
|
|
1267
|
-
embeds.push(ContentElement.instagram(instagram));
|
|
1233
|
+
embeds.push(ContentElement$1.instagram(instagram));
|
|
1268
1234
|
}
|
|
1269
1235
|
const twitter = twitterURLParser(url);
|
|
1270
1236
|
if (twitter) {
|
|
1271
|
-
embeds.push(ContentElement.twitter(twitter));
|
|
1237
|
+
embeds.push(ContentElement$1.twitter(twitter));
|
|
1272
1238
|
}
|
|
1273
1239
|
const tiktok = tiktokURLParser(url);
|
|
1274
1240
|
if (tiktok) {
|
|
1275
|
-
embeds.push(ContentElement.tiktok(tiktok));
|
|
1241
|
+
embeds.push(ContentElement$1.tiktok(tiktok));
|
|
1276
1242
|
}
|
|
1277
1243
|
const youtube = youtubeURLParser(url);
|
|
1278
1244
|
if (youtube) {
|
|
1279
|
-
embeds.push(ContentElement.youtube(youtube));
|
|
1245
|
+
embeds.push(ContentElement$1.youtube(youtube));
|
|
1280
1246
|
}
|
|
1281
1247
|
const facebookPost = facebookPostURLParser(url);
|
|
1282
1248
|
if (facebookPost) {
|
|
1283
|
-
embeds.push(ContentElement.facebook_post(facebookPost));
|
|
1249
|
+
embeds.push(ContentElement$1.facebook_post(facebookPost));
|
|
1284
1250
|
}
|
|
1285
1251
|
const facebookVideo = facebookVideoURLParser(url);
|
|
1286
1252
|
if (facebookVideo) {
|
|
1287
|
-
embeds.push(ContentElement.facebook_video(facebookVideo));
|
|
1253
|
+
embeds.push(ContentElement$1.facebook_video(facebookVideo));
|
|
1288
1254
|
}
|
|
1289
1255
|
return embeds;
|
|
1290
1256
|
}
|
|
1291
1257
|
const randomId = () => `${new Date().toISOString()}-${Math.random()}`;
|
|
1258
|
+
const isTextCE = (ce) => {
|
|
1259
|
+
return ce?.type === 'text';
|
|
1260
|
+
};
|
|
1261
|
+
const decodeHTMLEntities = (str) => htmlEntities.decode(str);
|
|
1292
1262
|
|
|
1293
1263
|
var ContentElements = /*#__PURE__*/Object.freeze({
|
|
1294
1264
|
__proto__: null,
|
|
1295
1265
|
createSocial: createSocial,
|
|
1266
|
+
decodeHTMLEntities: decodeHTMLEntities,
|
|
1296
1267
|
facebookPostURLParser: facebookPostURLParser,
|
|
1297
1268
|
facebookVideoURLParser: facebookVideoURLParser,
|
|
1298
1269
|
instagramURLParser: instagramURLParser,
|
|
1270
|
+
isTextCE: isTextCE,
|
|
1299
1271
|
randomId: randomId,
|
|
1300
1272
|
tiktokURLParser: tiktokURLParser,
|
|
1301
1273
|
twitterURLParser: twitterURLParser,
|
|
1302
1274
|
youtubeURLParser: youtubeURLParser
|
|
1303
1275
|
});
|
|
1304
1276
|
|
|
1305
|
-
const
|
|
1306
|
-
|
|
1307
|
-
const buffer = uuid.v5(identifier, namespace, Buffer.alloc(16));
|
|
1308
|
-
return encode(buffer, 'RFC4648', { padding: false });
|
|
1277
|
+
const isTextNode$1 = (node) => {
|
|
1278
|
+
return node instanceof nodeHtmlParser.TextNode;
|
|
1309
1279
|
};
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
*
|
|
1313
|
-
* @example
|
|
1314
|
-
* ```ts
|
|
1315
|
-
* const generator = new IdGenerator(['my-org']);
|
|
1316
|
-
* const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
|
|
1317
|
-
* const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
|
|
1318
|
-
* ```
|
|
1319
|
-
*/
|
|
1320
|
-
class IdGenerator {
|
|
1321
|
-
constructor(namespaces) {
|
|
1322
|
-
if (!namespaces.length) {
|
|
1323
|
-
throw new Error('At least 1 namespace is required');
|
|
1324
|
-
}
|
|
1325
|
-
this.namespace = namespaces.join('-');
|
|
1326
|
-
}
|
|
1327
|
-
getArcId(id) {
|
|
1328
|
-
return generateArcId(id.toString(), this.namespace);
|
|
1329
|
-
}
|
|
1330
|
-
getSourceId(id, prefixes = []) {
|
|
1331
|
-
return [...prefixes, id].join('-');
|
|
1332
|
-
}
|
|
1333
|
-
}
|
|
1334
|
-
|
|
1335
|
-
var Id = /*#__PURE__*/Object.freeze({
|
|
1336
|
-
__proto__: null,
|
|
1337
|
-
IdGenerator: IdGenerator,
|
|
1338
|
-
generateArcId: generateArcId
|
|
1339
|
-
});
|
|
1340
|
-
|
|
1341
|
-
const buildTree = (items) => {
|
|
1342
|
-
const tree = [
|
|
1343
|
-
{
|
|
1344
|
-
id: '/',
|
|
1345
|
-
children: [],
|
|
1346
|
-
meta: new Proxy({}, {
|
|
1347
|
-
get: () => {
|
|
1348
|
-
throw new Error('Root node meta is not accessible');
|
|
1349
|
-
},
|
|
1350
|
-
}),
|
|
1351
|
-
parent: null,
|
|
1352
|
-
},
|
|
1353
|
-
];
|
|
1354
|
-
// Track nodes at each level to maintain parent-child relationships
|
|
1355
|
-
// stores last node at each level
|
|
1356
|
-
const currLevelNodes = {
|
|
1357
|
-
0: tree[0],
|
|
1358
|
-
};
|
|
1359
|
-
for (const item of items) {
|
|
1360
|
-
const node = {
|
|
1361
|
-
id: item.id,
|
|
1362
|
-
parent: null,
|
|
1363
|
-
children: [],
|
|
1364
|
-
meta: item,
|
|
1365
|
-
};
|
|
1366
|
-
// Determine the level of this node
|
|
1367
|
-
const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
|
|
1368
|
-
const level = Number(levelKey?.replace('N', '')) || 0;
|
|
1369
|
-
if (!level) {
|
|
1370
|
-
throw new Error(`Invalid level for section ${item.id}`);
|
|
1371
|
-
}
|
|
1372
|
-
// This is a child node - attach to its parent
|
|
1373
|
-
const parentLevel = level - 1;
|
|
1374
|
-
const parentNode = currLevelNodes[parentLevel];
|
|
1375
|
-
if (parentNode) {
|
|
1376
|
-
node.parent = parentNode;
|
|
1377
|
-
parentNode.children.push(node);
|
|
1378
|
-
}
|
|
1379
|
-
else {
|
|
1380
|
-
throw new Error(`Parent node not found for section ${item.id}`);
|
|
1381
|
-
}
|
|
1382
|
-
// Set this as the current node for its level
|
|
1383
|
-
currLevelNodes[level] = node;
|
|
1384
|
-
}
|
|
1385
|
-
// return root nodes children
|
|
1386
|
-
return tree[0].children;
|
|
1280
|
+
const isHTMLElement = (node) => {
|
|
1281
|
+
return node instanceof nodeHtmlParser.HTMLElement;
|
|
1387
1282
|
};
|
|
1388
|
-
const
|
|
1389
|
-
|
|
1390
|
-
const traverse = (node) => {
|
|
1391
|
-
flatten.push(node);
|
|
1392
|
-
for (const child of node.children) {
|
|
1393
|
-
traverse(child);
|
|
1394
|
-
}
|
|
1395
|
-
};
|
|
1396
|
-
// traverse all root nodes and their children
|
|
1397
|
-
for (const node of tree) {
|
|
1398
|
-
traverse(node);
|
|
1399
|
-
}
|
|
1400
|
-
return flatten;
|
|
1283
|
+
const isCommentNode = (node) => {
|
|
1284
|
+
return node instanceof nodeHtmlParser.CommentNode;
|
|
1401
1285
|
};
|
|
1402
|
-
const
|
|
1403
|
-
|
|
1404
|
-
return sections.reduce((acc, section) => {
|
|
1405
|
-
const website = section._website;
|
|
1406
|
-
if (!acc[website])
|
|
1407
|
-
acc[website] = [];
|
|
1408
|
-
acc[website].push(section);
|
|
1409
|
-
return acc;
|
|
1410
|
-
}, {});
|
|
1286
|
+
const nodeTagIs = (node, name) => {
|
|
1287
|
+
return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
|
|
1411
1288
|
};
|
|
1412
|
-
const
|
|
1413
|
-
return
|
|
1414
|
-
id: s._id,
|
|
1415
|
-
website: s._website,
|
|
1416
|
-
type: 'section',
|
|
1417
|
-
}));
|
|
1289
|
+
const nodeTagIn = (node, names) => {
|
|
1290
|
+
return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
|
|
1418
1291
|
};
|
|
1419
|
-
const
|
|
1420
|
-
|
|
1292
|
+
const htmlToText = (html, parseOptions) => {
|
|
1293
|
+
if (!html)
|
|
1294
|
+
return '';
|
|
1295
|
+
const doc = nodeHtmlParser.parse(html, parseOptions);
|
|
1296
|
+
return decodeHTMLEntities(doc.innerText);
|
|
1421
1297
|
};
|
|
1422
|
-
const
|
|
1423
|
-
const
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
}
|
|
1428
|
-
else {
|
|
1429
|
-
map.set(`${s._id}${s._website}`, s);
|
|
1430
|
-
}
|
|
1431
|
-
});
|
|
1432
|
-
return [...map.values()];
|
|
1298
|
+
const getHTMLElementAttribute = (e, key) => {
|
|
1299
|
+
const value = e.getAttribute(key);
|
|
1300
|
+
if (value)
|
|
1301
|
+
return value;
|
|
1302
|
+
return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
|
|
1433
1303
|
};
|
|
1434
|
-
class SectionsRepository {
|
|
1435
|
-
constructor(arc) {
|
|
1436
|
-
this.arc = arc;
|
|
1437
|
-
this.sectionsByWebsite = {};
|
|
1438
|
-
this.websitesAreLoaded = false;
|
|
1439
|
-
}
|
|
1440
|
-
async put(ans) {
|
|
1441
|
-
await this.arc.Site.putSection(ans);
|
|
1442
|
-
const created = await this.arc.Site.getSection(ans._id, ans.website);
|
|
1443
|
-
this.save(created);
|
|
1444
|
-
}
|
|
1445
|
-
async loadWebsite(website) {
|
|
1446
|
-
const sections = [];
|
|
1447
|
-
let next = true;
|
|
1448
|
-
let offset = 0;
|
|
1449
|
-
while (next) {
|
|
1450
|
-
const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
|
|
1451
|
-
return { q_results: [] };
|
|
1452
|
-
});
|
|
1453
|
-
if (migrated.q_results.length) {
|
|
1454
|
-
sections.push(...migrated.q_results);
|
|
1455
|
-
offset += migrated.q_results.length;
|
|
1456
|
-
}
|
|
1457
|
-
else {
|
|
1458
|
-
next = false;
|
|
1459
|
-
}
|
|
1460
|
-
}
|
|
1461
|
-
return sections;
|
|
1462
|
-
}
|
|
1463
|
-
async loadWebsites(websites) {
|
|
1464
|
-
for (const website of websites) {
|
|
1465
|
-
this.sectionsByWebsite[website] = await this.loadWebsite(website);
|
|
1466
|
-
}
|
|
1467
|
-
this.websitesAreLoaded = true;
|
|
1468
|
-
}
|
|
1469
|
-
save(section) {
|
|
1470
|
-
const website = section._website;
|
|
1471
|
-
assert.ok(website, 'Section must have a website');
|
|
1472
|
-
this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
|
|
1473
|
-
if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
|
|
1474
|
-
this.sectionsByWebsite[website].push(section);
|
|
1475
|
-
}
|
|
1476
|
-
}
|
|
1477
|
-
getById(id, website) {
|
|
1478
|
-
this.ensureWebsitesLoaded();
|
|
1479
|
-
const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
|
|
1480
|
-
return section;
|
|
1481
|
-
}
|
|
1482
|
-
getByWebsite(website) {
|
|
1483
|
-
this.ensureWebsitesLoaded();
|
|
1484
|
-
return this.sectionsByWebsite[website];
|
|
1485
|
-
}
|
|
1486
|
-
getParentSections(section) {
|
|
1487
|
-
this.ensureWebsitesLoaded();
|
|
1488
|
-
const parents = [];
|
|
1489
|
-
let current = section;
|
|
1490
|
-
while (current.parent?.default && current.parent.default !== '/') {
|
|
1491
|
-
const parent = this.getById(current.parent.default, section._website);
|
|
1492
|
-
if (!parent)
|
|
1493
|
-
break;
|
|
1494
|
-
parents.push(parent);
|
|
1495
|
-
current = parent;
|
|
1496
|
-
}
|
|
1497
|
-
return parents;
|
|
1498
|
-
}
|
|
1499
|
-
ensureWebsitesLoaded() {
|
|
1500
|
-
assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
|
|
1501
|
-
}
|
|
1502
|
-
}
|
|
1503
1304
|
|
|
1504
|
-
var
|
|
1305
|
+
var html_utils = /*#__PURE__*/Object.freeze({
|
|
1505
1306
|
__proto__: null,
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
removeDuplicates: removeDuplicates
|
|
1307
|
+
getHTMLElementAttribute: getHTMLElementAttribute,
|
|
1308
|
+
htmlToText: htmlToText,
|
|
1309
|
+
isCommentNode: isCommentNode,
|
|
1310
|
+
isHTMLElement: isHTMLElement,
|
|
1311
|
+
isTextNode: isTextNode$1,
|
|
1312
|
+
nodeTagIn: nodeTagIn,
|
|
1313
|
+
nodeTagIs: nodeTagIs
|
|
1514
1314
|
});
|
|
1515
1315
|
|
|
1516
|
-
const ArcUtils = {
|
|
1517
|
-
Id,
|
|
1518
|
-
ANS,
|
|
1519
|
-
ContentElements,
|
|
1520
|
-
Section,
|
|
1521
|
-
};
|
|
1522
|
-
|
|
1523
1316
|
/**
|
|
1524
|
-
*
|
|
1525
|
-
*
|
|
1317
|
+
* HTMLProcessor is responsible for parsing HTML content into structured content elements.
|
|
1318
|
+
* It provides a flexible way to handle different HTML nodes and wrap text content.
|
|
1526
1319
|
*
|
|
1527
|
-
*
|
|
1528
|
-
*
|
|
1529
|
-
* Then you can override the specific methods to enrich the story with the data from BBC
|
|
1320
|
+
* The processor can be extended with custom handlers for specific node types and
|
|
1321
|
+
* wrappers for text content.
|
|
1530
1322
|
*
|
|
1531
|
-
*
|
|
1323
|
+
* @example
|
|
1324
|
+
* ```ts
|
|
1325
|
+
* // Create and initialize processor
|
|
1326
|
+
* const processor = new HTMLProcessor();
|
|
1327
|
+
* processor.init();
|
|
1328
|
+
*
|
|
1329
|
+
* // Parse HTML content
|
|
1330
|
+
* const html = '<div><p>Some text</p><img src="image.jpg"></div>';
|
|
1331
|
+
* const elements = await processor.parse(html);
|
|
1332
|
+
* ```
|
|
1333
|
+
*
|
|
1334
|
+
* The processor comes with built-in handlers for common HTML elements like links,
|
|
1335
|
+
* text formatting (i, u, strong), and block elements. Custom handlers can be added
|
|
1336
|
+
* using the `handle()` and `wrap()` methods.
|
|
1532
1337
|
*/
|
|
1533
|
-
class
|
|
1338
|
+
class HTMLProcessor {
|
|
1339
|
+
constructor() {
|
|
1340
|
+
this.parallelProcessing = true;
|
|
1341
|
+
this.handlers = {
|
|
1342
|
+
node: new Map(),
|
|
1343
|
+
wrap: new Map(),
|
|
1344
|
+
};
|
|
1345
|
+
}
|
|
1346
|
+
init() {
|
|
1347
|
+
// wrappers are used to wrap the content of nested text nodes
|
|
1348
|
+
// in a specific way
|
|
1349
|
+
this.wrap('link', (node, text) => {
|
|
1350
|
+
if (nodeTagIn(node, ['a'])) {
|
|
1351
|
+
const attributes = ['href', 'target', 'rel']
|
|
1352
|
+
.map((attr) => [attr, getHTMLElementAttribute(node, attr)])
|
|
1353
|
+
.filter(([_, value]) => value)
|
|
1354
|
+
.map(([key, value]) => `${key}="${value}"`)
|
|
1355
|
+
.join(' ');
|
|
1356
|
+
return {
|
|
1357
|
+
...text,
|
|
1358
|
+
content: `<a ${attributes}>${text.content}</a>`,
|
|
1359
|
+
};
|
|
1360
|
+
}
|
|
1361
|
+
});
|
|
1362
|
+
this.wrap('i', (node, text) => {
|
|
1363
|
+
if (nodeTagIn(node, ['i'])) {
|
|
1364
|
+
return {
|
|
1365
|
+
...text,
|
|
1366
|
+
content: `<i>${text.content}</i>`,
|
|
1367
|
+
};
|
|
1368
|
+
}
|
|
1369
|
+
});
|
|
1370
|
+
this.wrap('u', (node, text) => {
|
|
1371
|
+
if (nodeTagIn(node, ['u'])) {
|
|
1372
|
+
return {
|
|
1373
|
+
...text,
|
|
1374
|
+
content: `<u>${text.content}</u>`,
|
|
1375
|
+
};
|
|
1376
|
+
}
|
|
1377
|
+
});
|
|
1378
|
+
this.wrap('sup/sub', (node, text) => {
|
|
1379
|
+
if (nodeTagIn(node, ['sup', 'sub'])) {
|
|
1380
|
+
return {
|
|
1381
|
+
...text,
|
|
1382
|
+
content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
|
|
1383
|
+
};
|
|
1384
|
+
}
|
|
1385
|
+
});
|
|
1386
|
+
this.wrap('strong', (node, text) => {
|
|
1387
|
+
if (nodeTagIn(node, ['strong', 'b'])) {
|
|
1388
|
+
return {
|
|
1389
|
+
...text,
|
|
1390
|
+
content: `<b>${text.content}</b>`,
|
|
1391
|
+
};
|
|
1392
|
+
}
|
|
1393
|
+
});
|
|
1394
|
+
this.wrap('center', (node, text) => {
|
|
1395
|
+
if (nodeTagIn(node, ['center'])) {
|
|
1396
|
+
return {
|
|
1397
|
+
...text,
|
|
1398
|
+
alignment: 'center',
|
|
1399
|
+
};
|
|
1400
|
+
}
|
|
1401
|
+
});
|
|
1402
|
+
this.wrap('aligned-paragraph', (node, text) => {
|
|
1403
|
+
if (nodeTagIn(node, ['p'])) {
|
|
1404
|
+
const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
|
|
1405
|
+
if (!styleAttribute)
|
|
1406
|
+
return text;
|
|
1407
|
+
if (styleAttribute.includes('text-align: right;')) {
|
|
1408
|
+
return {
|
|
1409
|
+
...text,
|
|
1410
|
+
alignment: 'right',
|
|
1411
|
+
};
|
|
1412
|
+
}
|
|
1413
|
+
if (styleAttribute.includes('text-align: left;')) {
|
|
1414
|
+
return {
|
|
1415
|
+
...text,
|
|
1416
|
+
alignment: 'left',
|
|
1417
|
+
};
|
|
1418
|
+
}
|
|
1419
|
+
if (styleAttribute.includes('text-align: center;')) {
|
|
1420
|
+
return {
|
|
1421
|
+
...text,
|
|
1422
|
+
alignment: 'center',
|
|
1423
|
+
};
|
|
1424
|
+
}
|
|
1425
|
+
return text;
|
|
1426
|
+
}
|
|
1427
|
+
});
|
|
1428
|
+
// handlers are used to handle specific nodes
|
|
1429
|
+
// and return a list of content elements
|
|
1430
|
+
this.handle('default', (node) => {
|
|
1431
|
+
const noTag = isHTMLElement(node) && !node.tagName;
|
|
1432
|
+
if (noTag ||
|
|
1433
|
+
nodeTagIn(node, [
|
|
1434
|
+
'p',
|
|
1435
|
+
'a',
|
|
1436
|
+
'b',
|
|
1437
|
+
'sup',
|
|
1438
|
+
'sub',
|
|
1439
|
+
'span',
|
|
1440
|
+
'strong',
|
|
1441
|
+
'em',
|
|
1442
|
+
'i',
|
|
1443
|
+
'u',
|
|
1444
|
+
'section',
|
|
1445
|
+
'main',
|
|
1446
|
+
'div',
|
|
1447
|
+
'li',
|
|
1448
|
+
'center',
|
|
1449
|
+
])) {
|
|
1450
|
+
return this.handleNested(node);
|
|
1451
|
+
}
|
|
1452
|
+
});
|
|
1453
|
+
this.handle('headers', (node) => {
|
|
1454
|
+
if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
|
|
1455
|
+
return this.createHeader(node);
|
|
1456
|
+
}
|
|
1457
|
+
});
|
|
1458
|
+
this.handle('text', (node) => {
|
|
1459
|
+
if (isTextNode$1(node)) {
|
|
1460
|
+
return this.createText(node);
|
|
1461
|
+
}
|
|
1462
|
+
});
|
|
1463
|
+
this.handle('comment', (node) => {
|
|
1464
|
+
if (isCommentNode(node)) {
|
|
1465
|
+
return this.handleComment(node);
|
|
1466
|
+
}
|
|
1467
|
+
});
|
|
1468
|
+
this.handle('list', async (node) => {
|
|
1469
|
+
if (nodeTagIn(node, ['ul', 'ol'])) {
|
|
1470
|
+
const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
|
|
1471
|
+
return this.createList(node, listType);
|
|
1472
|
+
}
|
|
1473
|
+
});
|
|
1474
|
+
this.handle('table', (node) => {
|
|
1475
|
+
if (nodeTagIs(node, 'table')) {
|
|
1476
|
+
return this.handleTable(node);
|
|
1477
|
+
}
|
|
1478
|
+
});
|
|
1479
|
+
this.handle('iframe', (node) => {
|
|
1480
|
+
if (nodeTagIs(node, 'iframe')) {
|
|
1481
|
+
return this.handleIframe(node);
|
|
1482
|
+
}
|
|
1483
|
+
});
|
|
1484
|
+
this.handle('img', (node) => {
|
|
1485
|
+
if (nodeTagIs(node, 'img')) {
|
|
1486
|
+
return this.handleImage(node);
|
|
1487
|
+
}
|
|
1488
|
+
});
|
|
1489
|
+
this.handle('br', (node) => {
|
|
1490
|
+
if (nodeTagIs(node, 'br')) {
|
|
1491
|
+
return this.handleBreak(node);
|
|
1492
|
+
}
|
|
1493
|
+
});
|
|
1494
|
+
}
|
|
1495
|
+
handle(name, handler) {
|
|
1496
|
+
if (this.handlers.node.has(name)) {
|
|
1497
|
+
this.warn({ name }, `${name} node handler already set`);
|
|
1498
|
+
}
|
|
1499
|
+
this.handlers.node.set(name, handler);
|
|
1500
|
+
}
|
|
1501
|
+
wrap(name, handler) {
|
|
1502
|
+
if (this.handlers.wrap.has(name)) {
|
|
1503
|
+
this.warn({ name }, `${name} wrap handler already set`);
|
|
1504
|
+
}
|
|
1505
|
+
this.handlers.wrap.set(name, handler);
|
|
1506
|
+
}
|
|
1507
|
+
async parse(html) {
|
|
1508
|
+
const doc = nodeHtmlParser.parse(html, { comment: true });
|
|
1509
|
+
doc.removeWhitespace();
|
|
1510
|
+
const elements = await this.process(doc);
|
|
1511
|
+
const filtered = elements?.filter((e) => e.type !== 'divider');
|
|
1512
|
+
return filtered || [];
|
|
1513
|
+
}
|
|
1514
|
+
addTextAdditionalProperties(c, parent) {
|
|
1515
|
+
const additionalProperties = c.additional_properties || {};
|
|
1516
|
+
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
1517
|
+
c.additional_properties = {
|
|
1518
|
+
...c.additional_properties,
|
|
1519
|
+
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
1520
|
+
};
|
|
1521
|
+
return c;
|
|
1522
|
+
}
|
|
1523
|
+
/**
|
|
1524
|
+
* Wraps text content elements with additional properties and handlers.
|
|
1525
|
+
* This method iterates through an array of content elements and applies
|
|
1526
|
+
* wrappers to text elements.
|
|
1527
|
+
*
|
|
1528
|
+
* @param node - The HTML node containing the text elements
|
|
1529
|
+
**/
|
|
1530
|
+
wrapChildrenTextNodes(node, elements) {
|
|
1531
|
+
const wrapped = [];
|
|
1532
|
+
const wrappers = [...this.handlers.wrap.values()];
|
|
1533
|
+
for (const c of elements) {
|
|
1534
|
+
if (!isTextCE(c)) {
|
|
1535
|
+
wrapped.push(c);
|
|
1536
|
+
continue;
|
|
1537
|
+
}
|
|
1538
|
+
this.addTextAdditionalProperties(c, node);
|
|
1539
|
+
const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
|
|
1540
|
+
wrapped.push(handled || c);
|
|
1541
|
+
}
|
|
1542
|
+
return wrapped;
|
|
1543
|
+
}
|
|
1544
|
+
/**
|
|
1545
|
+
* Handles nested nodes by processing their children and merging text elements.
|
|
1546
|
+
* This method recursively processes the children of a given HTML node and
|
|
1547
|
+
* returns a list of content elements.
|
|
1548
|
+
*
|
|
1549
|
+
* @param node - The HTML node to process
|
|
1550
|
+
**/
|
|
1551
|
+
async handleNested(node) {
|
|
1552
|
+
const children = await this.processChildNodes(node);
|
|
1553
|
+
const filtered = children.filter(Boolean).flat();
|
|
1554
|
+
const merged = this.mergeParagraphs(filtered);
|
|
1555
|
+
const wrapped = this.wrapChildrenTextNodes(node, merged);
|
|
1556
|
+
return wrapped;
|
|
1557
|
+
}
|
|
1558
|
+
async processChildNodes(node) {
|
|
1559
|
+
if (this.parallelProcessing) {
|
|
1560
|
+
return await Promise.all(node.childNodes.map((child) => this.process(child)));
|
|
1561
|
+
}
|
|
1562
|
+
const children = [];
|
|
1563
|
+
for (const child of node.childNodes) {
|
|
1564
|
+
children.push(await this.process(child));
|
|
1565
|
+
}
|
|
1566
|
+
return children;
|
|
1567
|
+
}
|
|
1568
|
+
/**
|
|
1569
|
+
* Processes a single HTML node and converts it into content elements.
|
|
1570
|
+
* This method iterates through registered node handlers and attempts to process the node.
|
|
1571
|
+
* If a handler successfully processes the node, it returns an array of content elements.
|
|
1572
|
+
*
|
|
1573
|
+
* @param node - The HTML node to process
|
|
1574
|
+
* @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
|
|
1575
|
+
*/
|
|
1576
|
+
async process(node) {
|
|
1577
|
+
let isKnownNode = false;
|
|
1578
|
+
const elements = [];
|
|
1579
|
+
for (const [name, handler] of this.handlers.node.entries()) {
|
|
1580
|
+
try {
|
|
1581
|
+
const result = await handler(node);
|
|
1582
|
+
if (result) {
|
|
1583
|
+
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
1584
|
+
isKnownNode = true;
|
|
1585
|
+
elements.push(...result);
|
|
1586
|
+
break;
|
|
1587
|
+
}
|
|
1588
|
+
}
|
|
1589
|
+
catch (error) {
|
|
1590
|
+
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
if (isKnownNode)
|
|
1594
|
+
return elements;
|
|
1595
|
+
this.warn({ node: node.toString() }, 'UnknownNodeError');
|
|
1596
|
+
}
|
|
1597
|
+
/**
|
|
1598
|
+
* Merges adjacent text content elements into a single paragraph.
|
|
1599
|
+
* This method iterates through an array of content elements and combines
|
|
1600
|
+
* adjacent text elements into a single paragraph.
|
|
1601
|
+
*
|
|
1602
|
+
* @param items - The array of content elements to merge
|
|
1603
|
+
**/
|
|
1604
|
+
mergeParagraphs(items) {
|
|
1605
|
+
const merged = [];
|
|
1606
|
+
let toMerge = [];
|
|
1607
|
+
const merge = () => {
|
|
1608
|
+
if (!toMerge.length)
|
|
1609
|
+
return;
|
|
1610
|
+
const paragraph = toMerge.reduce((acc, p) => {
|
|
1611
|
+
return {
|
|
1612
|
+
...p,
|
|
1613
|
+
content: acc.content + p.content,
|
|
1614
|
+
};
|
|
1615
|
+
}, { type: 'text', content: '' });
|
|
1616
|
+
merged.push(paragraph);
|
|
1617
|
+
toMerge = [];
|
|
1618
|
+
};
|
|
1619
|
+
for (let i = 0; i < items.length; i++) {
|
|
1620
|
+
const item = items[i];
|
|
1621
|
+
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
1622
|
+
if (isTextCE(item) && !isBlockElement) {
|
|
1623
|
+
toMerge.push(item);
|
|
1624
|
+
}
|
|
1625
|
+
else {
|
|
1626
|
+
merge();
|
|
1627
|
+
merged.push(item);
|
|
1628
|
+
}
|
|
1629
|
+
}
|
|
1630
|
+
merge();
|
|
1631
|
+
return merged;
|
|
1632
|
+
}
|
|
1633
|
+
handleComment(_) {
|
|
1634
|
+
return [];
|
|
1635
|
+
}
|
|
1636
|
+
async handleTable(node) {
|
|
1637
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1638
|
+
}
|
|
1639
|
+
async handleIframe(node) {
|
|
1640
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1641
|
+
}
|
|
1642
|
+
async handleImage(node) {
|
|
1643
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1644
|
+
}
|
|
1645
|
+
async handleBreak(_) {
|
|
1646
|
+
return [ContentElement$1.divider()];
|
|
1647
|
+
}
|
|
1648
|
+
async createQuote(node) {
|
|
1649
|
+
const items = await this.handleNested(node);
|
|
1650
|
+
return [ContentElement$1.quote(items)];
|
|
1651
|
+
}
|
|
1652
|
+
async createText(node) {
|
|
1653
|
+
const text = ContentElement$1.text(node.text);
|
|
1654
|
+
return [text];
|
|
1655
|
+
}
|
|
1656
|
+
filterListItems(items) {
|
|
1657
|
+
return items.filter((i) => ['text', 'list'].includes(i.type));
|
|
1658
|
+
}
|
|
1659
|
+
async createList(node, type) {
|
|
1660
|
+
const items = await this.handleNested(node);
|
|
1661
|
+
return [ContentElement$1.list(type, this.filterListItems(items))];
|
|
1662
|
+
}
|
|
1663
|
+
async createHeader(node) {
|
|
1664
|
+
const level = +node.tagName.split('H')[1] || 3;
|
|
1665
|
+
return [ContentElement$1.header(node.innerText, level)];
|
|
1666
|
+
}
|
|
1667
|
+
isBlockElement(node) {
|
|
1668
|
+
if (!isHTMLElement(node))
|
|
1669
|
+
return false;
|
|
1670
|
+
const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS$1);
|
|
1671
|
+
return defaultBlockElements.has(node.tagName);
|
|
1672
|
+
}
|
|
1673
|
+
warn(metadata, message) {
|
|
1674
|
+
console.warn(metadata, message);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
|
|
1678
|
+
var index$4 = /*#__PURE__*/Object.freeze({
|
|
1679
|
+
__proto__: null,
|
|
1680
|
+
Constants: html_constants,
|
|
1681
|
+
HTMLProcessor: HTMLProcessor,
|
|
1682
|
+
Utils: html_utils
|
|
1683
|
+
});
|
|
1684
|
+
|
|
1685
|
+
const BLOCK_ELEMENT_TAGS = ['paragraph', 'line', 'header', 'ul', 'ol', 'li', 'embed', 'iframe', 'table'];
|
|
1686
|
+
|
|
1687
|
+
var xml_constants = /*#__PURE__*/Object.freeze({
|
|
1688
|
+
__proto__: null,
|
|
1689
|
+
BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
|
|
1690
|
+
});
|
|
1691
|
+
|
|
1692
|
+
const isXmlElement = (node) => {
|
|
1693
|
+
return node?.type === 'element';
|
|
1694
|
+
};
|
|
1695
|
+
const isTextNode = (node) => {
|
|
1696
|
+
return node?.type === 'text';
|
|
1697
|
+
};
|
|
1698
|
+
const nodeNameIs = (node, name) => {
|
|
1699
|
+
return isXmlElement(node) && node.name === name;
|
|
1700
|
+
};
|
|
1701
|
+
const nodeNameIn = (node, names) => {
|
|
1702
|
+
return isXmlElement(node) && names.includes(node.name);
|
|
1703
|
+
};
|
|
1704
|
+
|
|
1705
|
+
var xml_utils = /*#__PURE__*/Object.freeze({
|
|
1706
|
+
__proto__: null,
|
|
1707
|
+
isTextNode: isTextNode,
|
|
1708
|
+
isXmlElement: isXmlElement,
|
|
1709
|
+
nodeNameIn: nodeNameIn,
|
|
1710
|
+
nodeNameIs: nodeNameIs
|
|
1711
|
+
});
|
|
1712
|
+
|
|
1713
|
+
const ContentElement = ContentElement$1;
|
|
1714
|
+
class XMLProcessor {
|
|
1715
|
+
constructor() {
|
|
1716
|
+
this.handlers = {
|
|
1717
|
+
node: new Map(),
|
|
1718
|
+
wrap: new Map(),
|
|
1719
|
+
};
|
|
1720
|
+
}
|
|
1721
|
+
init() {
|
|
1722
|
+
// wrappers are used to wrap the content of nested text nodes
|
|
1723
|
+
// in a specific way
|
|
1724
|
+
this.wrap('link', (node, content) => {
|
|
1725
|
+
return `<a href="${node.attr.url || node.attr.href || '/'}">${content}</a>`;
|
|
1726
|
+
});
|
|
1727
|
+
this.wrap('header', (_node, content) => {
|
|
1728
|
+
return `<h3>${content}</h3>`;
|
|
1729
|
+
});
|
|
1730
|
+
this.wrap('emphasize', (_node, content) => {
|
|
1731
|
+
return `<i>${content}</i>`;
|
|
1732
|
+
});
|
|
1733
|
+
this.wrap('strong', (_node, content) => {
|
|
1734
|
+
return `<b>${content}</b>`;
|
|
1735
|
+
});
|
|
1736
|
+
// handlers are used to handle specific nodes
|
|
1737
|
+
// and return a list of content elements
|
|
1738
|
+
this.handle('default', (node) => {
|
|
1739
|
+
if (nodeNameIn(node, ['section', 'paragraph', 'line', 'header', 'emphasize', 'strong', 'link', 'li'])) {
|
|
1740
|
+
return this.handleNested(node);
|
|
1741
|
+
}
|
|
1742
|
+
});
|
|
1743
|
+
this.handle('text', (node) => {
|
|
1744
|
+
if (isTextNode(node)) {
|
|
1745
|
+
return [ContentElement.text(node.text)];
|
|
1746
|
+
}
|
|
1747
|
+
});
|
|
1748
|
+
this.handle('list', async (node) => {
|
|
1749
|
+
if (nodeNameIn(node, ['ul', 'ol'])) {
|
|
1750
|
+
const listType = node.name === 'ul' ? 'unordered' : 'ordered';
|
|
1751
|
+
return this.createList(node, listType);
|
|
1752
|
+
}
|
|
1753
|
+
});
|
|
1754
|
+
this.handle('table', (node) => {
|
|
1755
|
+
if (nodeNameIs(node, 'table')) {
|
|
1756
|
+
return this.handleTable(node);
|
|
1757
|
+
}
|
|
1758
|
+
});
|
|
1759
|
+
}
|
|
1760
|
+
async parse(xml) {
|
|
1761
|
+
const doc = new xmldoc__namespace.XmlDocument(xml);
|
|
1762
|
+
const elements = await this.process(doc);
|
|
1763
|
+
return elements || [];
|
|
1764
|
+
}
|
|
1765
|
+
handle(name, handler) {
|
|
1766
|
+
if (this.handlers.node.has(name)) {
|
|
1767
|
+
throw new Error(`${name} node handler already set`);
|
|
1768
|
+
}
|
|
1769
|
+
this.handlers.node.set(name, handler);
|
|
1770
|
+
}
|
|
1771
|
+
wrap(name, handler) {
|
|
1772
|
+
if (this.handlers.wrap.has(name)) {
|
|
1773
|
+
throw new Error(`${name} wrap handler already set`);
|
|
1774
|
+
}
|
|
1775
|
+
this.handlers.wrap.set(name, handler);
|
|
1776
|
+
}
|
|
1777
|
+
addTextAdditionalProperties(c, parent) {
|
|
1778
|
+
const additionalProperties = c.additional_properties || {};
|
|
1779
|
+
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
1780
|
+
c.additional_properties = {
|
|
1781
|
+
...c.additional_properties,
|
|
1782
|
+
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
1783
|
+
};
|
|
1784
|
+
return c;
|
|
1785
|
+
}
|
|
1786
|
+
wrapChildrenTextNodes(node, elements) {
|
|
1787
|
+
const wrapped = [];
|
|
1788
|
+
for (const c of elements) {
|
|
1789
|
+
if (!isTextCE(c)) {
|
|
1790
|
+
wrapped.push(c);
|
|
1791
|
+
continue;
|
|
1792
|
+
}
|
|
1793
|
+
this.addTextAdditionalProperties(c, node);
|
|
1794
|
+
const handler = this.handlers.wrap.get(node.name);
|
|
1795
|
+
if (handler) {
|
|
1796
|
+
wrapped.push({
|
|
1797
|
+
...c,
|
|
1798
|
+
content: handler(node, c.content),
|
|
1799
|
+
});
|
|
1800
|
+
}
|
|
1801
|
+
else {
|
|
1802
|
+
wrapped.push(c);
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
return wrapped;
|
|
1806
|
+
}
|
|
1807
|
+
async handleNested(node) {
|
|
1808
|
+
const children = await Promise.all(node.children.map((child) => this.process(child)));
|
|
1809
|
+
const filtered = children.filter(Boolean).flat();
|
|
1810
|
+
const merged = this.mergeParagraphs(filtered);
|
|
1811
|
+
const wrapped = this.wrapChildrenTextNodes(node, merged);
|
|
1812
|
+
return wrapped;
|
|
1813
|
+
}
|
|
1814
|
+
async process(node) {
|
|
1815
|
+
let isKnownNode = false;
|
|
1816
|
+
const elements = [];
|
|
1817
|
+
for (const [name, handler] of this.handlers.node.entries()) {
|
|
1818
|
+
try {
|
|
1819
|
+
const result = await handler(node);
|
|
1820
|
+
if (Array.isArray(result)) {
|
|
1821
|
+
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
1822
|
+
isKnownNode = true;
|
|
1823
|
+
elements.push(...result);
|
|
1824
|
+
break;
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
catch (error) {
|
|
1828
|
+
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
1829
|
+
}
|
|
1830
|
+
}
|
|
1831
|
+
if (isKnownNode)
|
|
1832
|
+
return elements;
|
|
1833
|
+
this.warn({ node: node.toString(), type: node.type }, 'UnknownNodeError');
|
|
1834
|
+
}
|
|
1835
|
+
mergeParagraphs(items) {
|
|
1836
|
+
const merged = [];
|
|
1837
|
+
let toMerge = [];
|
|
1838
|
+
const merge = () => {
|
|
1839
|
+
if (!toMerge.length)
|
|
1840
|
+
return;
|
|
1841
|
+
const paragraph = toMerge.reduce((acc, p) => {
|
|
1842
|
+
return {
|
|
1843
|
+
...p,
|
|
1844
|
+
content: acc.content + p.content,
|
|
1845
|
+
};
|
|
1846
|
+
}, { type: 'text', content: '' });
|
|
1847
|
+
merged.push(paragraph);
|
|
1848
|
+
toMerge = [];
|
|
1849
|
+
};
|
|
1850
|
+
for (let i = 0; i < items.length; i++) {
|
|
1851
|
+
const item = items[i];
|
|
1852
|
+
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
1853
|
+
if (isTextCE(item) && !isBlockElement) {
|
|
1854
|
+
toMerge.push(item);
|
|
1855
|
+
}
|
|
1856
|
+
else {
|
|
1857
|
+
merge();
|
|
1858
|
+
merged.push(item);
|
|
1859
|
+
}
|
|
1860
|
+
}
|
|
1861
|
+
merge();
|
|
1862
|
+
return merged;
|
|
1863
|
+
}
|
|
1864
|
+
async handleTable(node) {
|
|
1865
|
+
const html = node.toString({ html: true });
|
|
1866
|
+
return [ContentElement.raw_html(html)];
|
|
1867
|
+
}
|
|
1868
|
+
async createQuote(node) {
|
|
1869
|
+
const items = await this.handleNested(node);
|
|
1870
|
+
return [ContentElement.quote(items)];
|
|
1871
|
+
}
|
|
1872
|
+
async createList(node, type) {
|
|
1873
|
+
const items = await this.handleNested(node);
|
|
1874
|
+
return [ContentElement.list(type, items)];
|
|
1875
|
+
}
|
|
1876
|
+
getNodeInnerText(node) {
|
|
1877
|
+
return node.children.map((n) => this.htmlFromNode(n).innerText.trim());
|
|
1878
|
+
}
|
|
1879
|
+
getNodeInnerHTML(node) {
|
|
1880
|
+
return node.children.map((n) => this.htmlFromNode(n).innerHTML.trim());
|
|
1881
|
+
}
|
|
1882
|
+
htmlFromNode(node) {
|
|
1883
|
+
return nodeHtmlParser.parse(node.toString({ html: true }));
|
|
1884
|
+
}
|
|
1885
|
+
getDecodedHTMLFromInnerNodes(node) {
|
|
1886
|
+
const encodedHtml = this.getNodeInnerText(node).join('');
|
|
1887
|
+
const decoded = decodeHTMLEntities(encodedHtml);
|
|
1888
|
+
return decoded;
|
|
1889
|
+
}
|
|
1890
|
+
isBlockElement(node) {
|
|
1891
|
+
const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
|
|
1892
|
+
if (defaultBlockElements.has(node.name))
|
|
1893
|
+
return true;
|
|
1894
|
+
}
|
|
1895
|
+
warn(metadata, message) {
|
|
1896
|
+
console.warn(metadata, message);
|
|
1897
|
+
}
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
var index$3 = /*#__PURE__*/Object.freeze({
|
|
1901
|
+
__proto__: null,
|
|
1902
|
+
Constants: xml_constants,
|
|
1903
|
+
Utils: xml_utils,
|
|
1904
|
+
XMLProcessor: XMLProcessor
|
|
1905
|
+
});
|
|
1906
|
+
|
|
1907
|
+
var index$2 = /*#__PURE__*/Object.freeze({
|
|
1908
|
+
__proto__: null,
|
|
1909
|
+
ContentElement: ContentElement$1,
|
|
1910
|
+
HTML: index$4,
|
|
1911
|
+
XML: index$3
|
|
1912
|
+
});
|
|
1913
|
+
|
|
1914
|
+
/**
|
|
1915
|
+
* Base class for all arc entities, it provides common methods and properties
|
|
1916
|
+
* If you want to create a new entity subtype you should extend this class
|
|
1917
|
+
*
|
|
1918
|
+
* Use case: You want to migrate stories from BBC
|
|
1919
|
+
* You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
|
|
1920
|
+
* Then you can override the specific methods to enrich the story with the data from BBC
|
|
1921
|
+
*
|
|
1922
|
+
* To migrate it call .migrate() method
|
|
1923
|
+
*/
|
|
1924
|
+
class Document {
|
|
1534
1925
|
constructor() {
|
|
1535
1926
|
this.ans = null;
|
|
1536
1927
|
this.circulations = [];
|
|
@@ -1768,480 +2159,314 @@ class Story extends Document {
|
|
|
1768
2159
|
}
|
|
1769
2160
|
}
|
|
1770
2161
|
|
|
1771
|
-
var index$
|
|
2162
|
+
var index$1 = /*#__PURE__*/Object.freeze({
|
|
1772
2163
|
__proto__: null,
|
|
1773
2164
|
Document: Document,
|
|
1774
2165
|
Story: Story
|
|
1775
2166
|
});
|
|
1776
2167
|
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
];
|
|
2168
|
+
var ANSType;
|
|
2169
|
+
(function (ANSType) {
|
|
2170
|
+
ANSType["Story"] = "story";
|
|
2171
|
+
ANSType["Video"] = "video";
|
|
2172
|
+
ANSType["Tag"] = "tag";
|
|
2173
|
+
ANSType["Author"] = "author";
|
|
2174
|
+
ANSType["Gallery"] = "gallery";
|
|
2175
|
+
ANSType["Image"] = "image";
|
|
2176
|
+
ANSType["Redirect"] = "redirect";
|
|
2177
|
+
})(ANSType || (ANSType = {}));
|
|
2178
|
+
var MigrationStatus;
|
|
2179
|
+
(function (MigrationStatus) {
|
|
2180
|
+
MigrationStatus["Success"] = "Success";
|
|
2181
|
+
MigrationStatus["Queued"] = "Queued";
|
|
2182
|
+
MigrationStatus["Circulated"] = "Circulated";
|
|
2183
|
+
MigrationStatus["Published"] = "Published";
|
|
2184
|
+
MigrationStatus["Scheduled"] = "Scheduled";
|
|
2185
|
+
MigrationStatus["FailVideo"] = "FailVideo";
|
|
2186
|
+
MigrationStatus["FailImage"] = "FailImage";
|
|
2187
|
+
MigrationStatus["FailPhoto"] = "FailPhoto";
|
|
2188
|
+
MigrationStatus["FailStory"] = "FailStory";
|
|
2189
|
+
MigrationStatus["FailGallery"] = "FailGallery";
|
|
2190
|
+
MigrationStatus["FailAuthor"] = "FailAuthor";
|
|
2191
|
+
MigrationStatus["FailTag"] = "FailTag";
|
|
2192
|
+
MigrationStatus["ValidationFailed"] = "ValidationFailed";
|
|
2193
|
+
})(MigrationStatus || (MigrationStatus = {}));
|
|
2194
|
+
var SummarySortBy;
|
|
2195
|
+
(function (SummarySortBy) {
|
|
2196
|
+
SummarySortBy["CreateDate"] = "createDate";
|
|
2197
|
+
SummarySortBy["UpdateDate"] = "updateDate";
|
|
2198
|
+
SummarySortBy["Id"] = "id";
|
|
2199
|
+
})(SummarySortBy || (SummarySortBy = {}));
|
|
2200
|
+
var SummarySortOrder;
|
|
2201
|
+
(function (SummarySortOrder) {
|
|
2202
|
+
SummarySortOrder["ASC"] = "ASC";
|
|
2203
|
+
SummarySortOrder["DESC"] = "DESC";
|
|
2204
|
+
})(SummarySortOrder || (SummarySortOrder = {}));
|
|
1813
2205
|
|
|
1814
|
-
|
|
2206
|
+
/* eslint-disable */
|
|
2207
|
+
/**
|
|
2208
|
+
* This file was automatically generated by json-schema-to-typescript.
|
|
2209
|
+
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
2210
|
+
* and run json-schema-to-typescript to regenerate this file.
|
|
2211
|
+
*/
|
|
2212
|
+
|
|
2213
|
+
var ansTypes = /*#__PURE__*/Object.freeze({
|
|
2214
|
+
__proto__: null
|
|
2215
|
+
});
|
|
2216
|
+
|
|
2217
|
+
var utils = /*#__PURE__*/Object.freeze({
|
|
2218
|
+
__proto__: null
|
|
2219
|
+
});
|
|
2220
|
+
|
|
2221
|
+
var index = /*#__PURE__*/Object.freeze({
|
|
1815
2222
|
__proto__: null,
|
|
1816
|
-
|
|
2223
|
+
ANS: ansTypes,
|
|
2224
|
+
get ANSType () { return ANSType; },
|
|
2225
|
+
get MigrationStatus () { return MigrationStatus; },
|
|
2226
|
+
get SummarySortBy () { return SummarySortBy; },
|
|
2227
|
+
get SummarySortOrder () { return SummarySortOrder; },
|
|
2228
|
+
TypeUtils: utils
|
|
1817
2229
|
});
|
|
1818
2230
|
|
|
1819
|
-
const
|
|
1820
|
-
return
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
};
|
|
1828
|
-
const nodeTagIs = (node, name) => {
|
|
1829
|
-
return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
|
|
1830
|
-
};
|
|
1831
|
-
const nodeTagIn = (node, names) => {
|
|
1832
|
-
return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
|
|
1833
|
-
};
|
|
1834
|
-
const isTextCE = (ce) => {
|
|
1835
|
-
return ce?.type === 'text';
|
|
1836
|
-
};
|
|
1837
|
-
const decodeHTMLEntities = (str) => htmlEntities.decode(str);
|
|
1838
|
-
const htmlToText = (html, parseOptions) => {
|
|
1839
|
-
if (!html)
|
|
1840
|
-
return '';
|
|
1841
|
-
const doc = nodeHtmlParser.parse(html, parseOptions);
|
|
1842
|
-
return decodeHTMLEntities(doc.innerText);
|
|
1843
|
-
};
|
|
1844
|
-
const getHTMLElementAttribute = (e, key) => {
|
|
1845
|
-
const value = e.getAttribute(key);
|
|
1846
|
-
if (value)
|
|
1847
|
-
return value;
|
|
1848
|
-
return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
|
|
2231
|
+
const reference = (ref) => {
|
|
2232
|
+
return {
|
|
2233
|
+
_id: ref.id,
|
|
2234
|
+
type: 'reference',
|
|
2235
|
+
referent: {
|
|
2236
|
+
...ref,
|
|
2237
|
+
},
|
|
2238
|
+
};
|
|
1849
2239
|
};
|
|
1850
2240
|
|
|
1851
|
-
var
|
|
2241
|
+
var ANS = /*#__PURE__*/Object.freeze({
|
|
1852
2242
|
__proto__: null,
|
|
1853
|
-
|
|
1854
|
-
getHTMLElementAttribute: getHTMLElementAttribute,
|
|
1855
|
-
htmlToText: htmlToText,
|
|
1856
|
-
isCommentNode: isCommentNode,
|
|
1857
|
-
isHTMLElement: isHTMLElement,
|
|
1858
|
-
isTextCE: isTextCE,
|
|
1859
|
-
isTextNode: isTextNode,
|
|
1860
|
-
nodeTagIn: nodeTagIn,
|
|
1861
|
-
nodeTagIs: nodeTagIs
|
|
2243
|
+
reference: reference
|
|
1862
2244
|
});
|
|
1863
2245
|
|
|
2246
|
+
const generateArcId = (identifier, orgHostname) => {
|
|
2247
|
+
const namespace = uuid.v5(orgHostname, uuid.v5.DNS);
|
|
2248
|
+
const buffer = uuid.v5(identifier, namespace, Buffer.alloc(16));
|
|
2249
|
+
return encode(buffer, 'RFC4648', { padding: false });
|
|
2250
|
+
};
|
|
1864
2251
|
/**
|
|
1865
|
-
*
|
|
1866
|
-
* It provides a flexible way to handle different HTML nodes and wrap text content.
|
|
1867
|
-
*
|
|
1868
|
-
* The processor can be extended with custom handlers for specific node types and
|
|
1869
|
-
* wrappers for text content.
|
|
2252
|
+
* Utility class for generating Arc IDs and source IDs
|
|
1870
2253
|
*
|
|
1871
2254
|
* @example
|
|
1872
2255
|
* ```ts
|
|
1873
|
-
*
|
|
1874
|
-
* const
|
|
1875
|
-
*
|
|
1876
|
-
*
|
|
1877
|
-
* // Parse HTML content
|
|
1878
|
-
* const html = '<div><p>Some text</p><img src="image.jpg"></div>';
|
|
1879
|
-
* const elements = await processor.parse(html);
|
|
2256
|
+
* const generator = new IdGenerator(['my-org']);
|
|
2257
|
+
* const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
|
|
2258
|
+
* const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
|
|
1880
2259
|
* ```
|
|
1881
|
-
*
|
|
1882
|
-
* The processor comes with built-in handlers for common HTML elements like links,
|
|
1883
|
-
* text formatting (i, u, strong), and block elements. Custom handlers can be added
|
|
1884
|
-
* using the `handle()` and `wrap()` methods.
|
|
1885
2260
|
*/
|
|
1886
|
-
class
|
|
1887
|
-
constructor() {
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
node: new Map(),
|
|
1891
|
-
wrap: new Map(),
|
|
1892
|
-
};
|
|
1893
|
-
}
|
|
1894
|
-
init() {
|
|
1895
|
-
// wrappers are used to wrap the content of nested text nodes
|
|
1896
|
-
// in a specific way
|
|
1897
|
-
this.wrap('link', (node, text) => {
|
|
1898
|
-
if (nodeTagIn(node, ['a'])) {
|
|
1899
|
-
const attributes = ['href', 'target', 'rel']
|
|
1900
|
-
.map((attr) => [attr, getHTMLElementAttribute(node, attr)])
|
|
1901
|
-
.filter(([_, value]) => value)
|
|
1902
|
-
.map(([key, value]) => `${key}="${value}"`)
|
|
1903
|
-
.join(' ');
|
|
1904
|
-
return {
|
|
1905
|
-
...text,
|
|
1906
|
-
content: `<a ${attributes}>${text.content}</a>`,
|
|
1907
|
-
};
|
|
1908
|
-
}
|
|
1909
|
-
});
|
|
1910
|
-
this.wrap('i', (node, text) => {
|
|
1911
|
-
if (nodeTagIn(node, ['i'])) {
|
|
1912
|
-
return {
|
|
1913
|
-
...text,
|
|
1914
|
-
content: `<i>${text.content}</i>`,
|
|
1915
|
-
};
|
|
1916
|
-
}
|
|
1917
|
-
});
|
|
1918
|
-
this.wrap('u', (node, text) => {
|
|
1919
|
-
if (nodeTagIn(node, ['u'])) {
|
|
1920
|
-
return {
|
|
1921
|
-
...text,
|
|
1922
|
-
content: `<u>${text.content}</u>`,
|
|
1923
|
-
};
|
|
1924
|
-
}
|
|
1925
|
-
});
|
|
1926
|
-
this.wrap('sup/sub', (node, text) => {
|
|
1927
|
-
if (nodeTagIn(node, ['sup', 'sub'])) {
|
|
1928
|
-
return {
|
|
1929
|
-
...text,
|
|
1930
|
-
content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
|
|
1931
|
-
};
|
|
1932
|
-
}
|
|
1933
|
-
});
|
|
1934
|
-
this.wrap('strong', (node, text) => {
|
|
1935
|
-
if (nodeTagIn(node, ['strong', 'b'])) {
|
|
1936
|
-
return {
|
|
1937
|
-
...text,
|
|
1938
|
-
content: `<b>${text.content}</b>`,
|
|
1939
|
-
};
|
|
1940
|
-
}
|
|
1941
|
-
});
|
|
1942
|
-
this.wrap('center', (node, text) => {
|
|
1943
|
-
if (nodeTagIn(node, ['center'])) {
|
|
1944
|
-
return {
|
|
1945
|
-
...text,
|
|
1946
|
-
alignment: 'center',
|
|
1947
|
-
};
|
|
1948
|
-
}
|
|
1949
|
-
});
|
|
1950
|
-
this.wrap('aligned-paragraph', (node, text) => {
|
|
1951
|
-
if (nodeTagIn(node, ['p'])) {
|
|
1952
|
-
const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
|
|
1953
|
-
if (!styleAttribute)
|
|
1954
|
-
return text;
|
|
1955
|
-
if (styleAttribute.includes('text-align: right;')) {
|
|
1956
|
-
return {
|
|
1957
|
-
...text,
|
|
1958
|
-
alignment: 'right',
|
|
1959
|
-
};
|
|
1960
|
-
}
|
|
1961
|
-
if (styleAttribute.includes('text-align: left;')) {
|
|
1962
|
-
return {
|
|
1963
|
-
...text,
|
|
1964
|
-
alignment: 'left',
|
|
1965
|
-
};
|
|
1966
|
-
}
|
|
1967
|
-
if (styleAttribute.includes('text-align: center;')) {
|
|
1968
|
-
return {
|
|
1969
|
-
...text,
|
|
1970
|
-
alignment: 'center',
|
|
1971
|
-
};
|
|
1972
|
-
}
|
|
1973
|
-
return text;
|
|
1974
|
-
}
|
|
1975
|
-
});
|
|
1976
|
-
// handlers are used to handle specific nodes
|
|
1977
|
-
// and return a list of content elements
|
|
1978
|
-
this.handle('default', (node) => {
|
|
1979
|
-
const noTag = isHTMLElement(node) && !node.tagName;
|
|
1980
|
-
if (noTag ||
|
|
1981
|
-
nodeTagIn(node, [
|
|
1982
|
-
'p',
|
|
1983
|
-
'a',
|
|
1984
|
-
'b',
|
|
1985
|
-
'sup',
|
|
1986
|
-
'sub',
|
|
1987
|
-
'span',
|
|
1988
|
-
'strong',
|
|
1989
|
-
'em',
|
|
1990
|
-
'i',
|
|
1991
|
-
'u',
|
|
1992
|
-
'section',
|
|
1993
|
-
'main',
|
|
1994
|
-
'div',
|
|
1995
|
-
'li',
|
|
1996
|
-
'center',
|
|
1997
|
-
])) {
|
|
1998
|
-
return this.handleNested(node);
|
|
1999
|
-
}
|
|
2000
|
-
});
|
|
2001
|
-
this.handle('headers', (node) => {
|
|
2002
|
-
if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
|
|
2003
|
-
return this.createHeader(node);
|
|
2004
|
-
}
|
|
2005
|
-
});
|
|
2006
|
-
this.handle('text', (node) => {
|
|
2007
|
-
if (isTextNode(node)) {
|
|
2008
|
-
return this.createText(node);
|
|
2009
|
-
}
|
|
2010
|
-
});
|
|
2011
|
-
this.handle('comment', (node) => {
|
|
2012
|
-
if (isCommentNode(node)) {
|
|
2013
|
-
return this.handleComment(node);
|
|
2014
|
-
}
|
|
2015
|
-
});
|
|
2016
|
-
this.handle('list', async (node) => {
|
|
2017
|
-
if (nodeTagIn(node, ['ul', 'ol'])) {
|
|
2018
|
-
const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
|
|
2019
|
-
return this.createList(node, listType);
|
|
2020
|
-
}
|
|
2021
|
-
});
|
|
2022
|
-
this.handle('table', (node) => {
|
|
2023
|
-
if (nodeTagIs(node, 'table')) {
|
|
2024
|
-
return this.handleTable(node);
|
|
2025
|
-
}
|
|
2026
|
-
});
|
|
2027
|
-
this.handle('iframe', (node) => {
|
|
2028
|
-
if (nodeTagIs(node, 'iframe')) {
|
|
2029
|
-
return this.handleIframe(node);
|
|
2030
|
-
}
|
|
2031
|
-
});
|
|
2032
|
-
this.handle('img', (node) => {
|
|
2033
|
-
if (nodeTagIs(node, 'img')) {
|
|
2034
|
-
return this.handleImage(node);
|
|
2035
|
-
}
|
|
2036
|
-
});
|
|
2037
|
-
this.handle('br', (node) => {
|
|
2038
|
-
if (nodeTagIs(node, 'br')) {
|
|
2039
|
-
return this.handleBreak(node);
|
|
2040
|
-
}
|
|
2041
|
-
});
|
|
2042
|
-
}
|
|
2043
|
-
handle(name, handler) {
|
|
2044
|
-
if (this.handlers.node.has(name)) {
|
|
2045
|
-
this.warn({ name }, `${name} node handler already set`);
|
|
2046
|
-
}
|
|
2047
|
-
this.handlers.node.set(name, handler);
|
|
2048
|
-
}
|
|
2049
|
-
wrap(name, handler) {
|
|
2050
|
-
if (this.handlers.wrap.has(name)) {
|
|
2051
|
-
this.warn({ name }, `${name} wrap handler already set`);
|
|
2261
|
+
class IdGenerator {
|
|
2262
|
+
constructor(namespaces) {
|
|
2263
|
+
if (!namespaces.length) {
|
|
2264
|
+
throw new Error('At least 1 namespace is required');
|
|
2052
2265
|
}
|
|
2053
|
-
this.
|
|
2266
|
+
this.namespace = namespaces.join('-');
|
|
2054
2267
|
}
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
doc.removeWhitespace();
|
|
2058
|
-
const elements = await this.process(doc);
|
|
2059
|
-
const filtered = elements?.filter((e) => e.type !== 'divider');
|
|
2060
|
-
return filtered || [];
|
|
2268
|
+
getArcId(id) {
|
|
2269
|
+
return generateArcId(id.toString(), this.namespace);
|
|
2061
2270
|
}
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
2065
|
-
c.additional_properties = {
|
|
2066
|
-
...c.additional_properties,
|
|
2067
|
-
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
2068
|
-
};
|
|
2069
|
-
return c;
|
|
2271
|
+
getSourceId(id, prefixes = []) {
|
|
2272
|
+
return [...prefixes, id].join('-');
|
|
2070
2273
|
}
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2274
|
+
}
|
|
2275
|
+
|
|
2276
|
+
var Id = /*#__PURE__*/Object.freeze({
|
|
2277
|
+
__proto__: null,
|
|
2278
|
+
IdGenerator: IdGenerator,
|
|
2279
|
+
generateArcId: generateArcId
|
|
2280
|
+
});
|
|
2281
|
+
|
|
2282
|
+
const buildTree = (items) => {
|
|
2283
|
+
const tree = [
|
|
2284
|
+
{
|
|
2285
|
+
id: '/',
|
|
2286
|
+
children: [],
|
|
2287
|
+
meta: new Proxy({}, {
|
|
2288
|
+
get: () => {
|
|
2289
|
+
throw new Error('Root node meta is not accessible');
|
|
2290
|
+
},
|
|
2291
|
+
}),
|
|
2292
|
+
parent: null,
|
|
2293
|
+
},
|
|
2294
|
+
];
|
|
2295
|
+
// Track nodes at each level to maintain parent-child relationships
|
|
2296
|
+
// stores last node at each level
|
|
2297
|
+
const currLevelNodes = {
|
|
2298
|
+
0: tree[0],
|
|
2299
|
+
};
|
|
2300
|
+
for (const item of items) {
|
|
2301
|
+
const node = {
|
|
2302
|
+
id: item.id,
|
|
2303
|
+
parent: null,
|
|
2304
|
+
children: [],
|
|
2305
|
+
meta: item,
|
|
2306
|
+
};
|
|
2307
|
+
// Determine the level of this node
|
|
2308
|
+
const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
|
|
2309
|
+
const level = Number(levelKey?.replace('N', '')) || 0;
|
|
2310
|
+
if (!level) {
|
|
2311
|
+
throw new Error(`Invalid level for section ${item.id}`);
|
|
2089
2312
|
}
|
|
2090
|
-
|
|
2313
|
+
// This is a child node - attach to its parent
|
|
2314
|
+
const parentLevel = level - 1;
|
|
2315
|
+
const parentNode = currLevelNodes[parentLevel];
|
|
2316
|
+
if (parentNode) {
|
|
2317
|
+
node.parent = parentNode;
|
|
2318
|
+
parentNode.children.push(node);
|
|
2319
|
+
}
|
|
2320
|
+
else {
|
|
2321
|
+
throw new Error(`Parent node not found for section ${item.id}`);
|
|
2322
|
+
}
|
|
2323
|
+
// Set this as the current node for its level
|
|
2324
|
+
currLevelNodes[level] = node;
|
|
2091
2325
|
}
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2326
|
+
// return root nodes children
|
|
2327
|
+
return tree[0].children;
|
|
2328
|
+
};
|
|
2329
|
+
const flattenTree = (tree) => {
|
|
2330
|
+
const flatten = [];
|
|
2331
|
+
const traverse = (node) => {
|
|
2332
|
+
flatten.push(node);
|
|
2333
|
+
for (const child of node.children) {
|
|
2334
|
+
traverse(child);
|
|
2335
|
+
}
|
|
2336
|
+
};
|
|
2337
|
+
// traverse all root nodes and their children
|
|
2338
|
+
for (const node of tree) {
|
|
2339
|
+
traverse(node);
|
|
2105
2340
|
}
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2341
|
+
return flatten;
|
|
2342
|
+
};
|
|
2343
|
+
const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
|
|
2344
|
+
const groupByWebsites = (sections) => {
|
|
2345
|
+
return sections.reduce((acc, section) => {
|
|
2346
|
+
const website = section._website;
|
|
2347
|
+
if (!acc[website])
|
|
2348
|
+
acc[website] = [];
|
|
2349
|
+
acc[website].push(section);
|
|
2350
|
+
return acc;
|
|
2351
|
+
}, {});
|
|
2352
|
+
};
|
|
2353
|
+
const references = (sections) => {
|
|
2354
|
+
return sections.map((s) => reference({
|
|
2355
|
+
id: s._id,
|
|
2356
|
+
website: s._website,
|
|
2357
|
+
type: 'section',
|
|
2358
|
+
}));
|
|
2359
|
+
};
|
|
2360
|
+
const isReference = (section) => {
|
|
2361
|
+
return section?.type === 'reference' && section?.referent?.type === 'section';
|
|
2362
|
+
};
|
|
2363
|
+
const removeDuplicates = (sections) => {
|
|
2364
|
+
const map = new Map();
|
|
2365
|
+
sections.forEach((s) => {
|
|
2366
|
+
if (isReference(s)) {
|
|
2367
|
+
map.set(`${s.referent.id}${s.referent.website}`, s);
|
|
2109
2368
|
}
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
children.push(await this.process(child));
|
|
2369
|
+
else {
|
|
2370
|
+
map.set(`${s._id}${s._website}`, s);
|
|
2113
2371
|
}
|
|
2114
|
-
|
|
2372
|
+
});
|
|
2373
|
+
return [...map.values()];
|
|
2374
|
+
};
|
|
2375
|
+
class SectionsRepository {
|
|
2376
|
+
constructor(arc) {
|
|
2377
|
+
this.arc = arc;
|
|
2378
|
+
this.sectionsByWebsite = {};
|
|
2379
|
+
this.websitesAreLoaded = false;
|
|
2115
2380
|
}
|
|
2116
|
-
|
|
2117
|
-
|
|
2118
|
-
|
|
2119
|
-
|
|
2120
|
-
*
|
|
2121
|
-
* @param node - The HTML node to process
|
|
2122
|
-
* @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
|
|
2123
|
-
*/
|
|
2124
|
-
async process(node) {
|
|
2125
|
-
let isKnownNode = false;
|
|
2126
|
-
const elements = [];
|
|
2127
|
-
for (const [name, handler] of this.handlers.node.entries()) {
|
|
2128
|
-
try {
|
|
2129
|
-
const result = await handler(node);
|
|
2130
|
-
if (result) {
|
|
2131
|
-
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
2132
|
-
isKnownNode = true;
|
|
2133
|
-
elements.push(...result);
|
|
2134
|
-
break;
|
|
2135
|
-
}
|
|
2136
|
-
}
|
|
2137
|
-
catch (error) {
|
|
2138
|
-
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
2139
|
-
}
|
|
2140
|
-
}
|
|
2141
|
-
if (isKnownNode)
|
|
2142
|
-
return elements;
|
|
2143
|
-
this.warn({ node: node.toString() }, 'UnknownNodeError');
|
|
2381
|
+
async put(ans) {
|
|
2382
|
+
await this.arc.Site.putSection(ans);
|
|
2383
|
+
const created = await this.arc.Site.getSection(ans._id, ans.website);
|
|
2384
|
+
this.save(created);
|
|
2144
2385
|
}
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
|
|
2155
|
-
|
|
2156
|
-
if (!toMerge.length)
|
|
2157
|
-
return;
|
|
2158
|
-
const paragraph = toMerge.reduce((acc, p) => {
|
|
2159
|
-
return {
|
|
2160
|
-
...p,
|
|
2161
|
-
content: acc.content + p.content,
|
|
2162
|
-
};
|
|
2163
|
-
}, { type: 'text', content: '' });
|
|
2164
|
-
merged.push(paragraph);
|
|
2165
|
-
toMerge = [];
|
|
2166
|
-
};
|
|
2167
|
-
for (let i = 0; i < items.length; i++) {
|
|
2168
|
-
const item = items[i];
|
|
2169
|
-
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
2170
|
-
if (isTextCE(item) && !isBlockElement) {
|
|
2171
|
-
toMerge.push(item);
|
|
2386
|
+
async loadWebsite(website) {
|
|
2387
|
+
const sections = [];
|
|
2388
|
+
let next = true;
|
|
2389
|
+
let offset = 0;
|
|
2390
|
+
while (next) {
|
|
2391
|
+
const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
|
|
2392
|
+
return { q_results: [] };
|
|
2393
|
+
});
|
|
2394
|
+
if (migrated.q_results.length) {
|
|
2395
|
+
sections.push(...migrated.q_results);
|
|
2396
|
+
offset += migrated.q_results.length;
|
|
2172
2397
|
}
|
|
2173
2398
|
else {
|
|
2174
|
-
|
|
2175
|
-
merged.push(item);
|
|
2399
|
+
next = false;
|
|
2176
2400
|
}
|
|
2177
2401
|
}
|
|
2178
|
-
|
|
2179
|
-
return merged;
|
|
2180
|
-
}
|
|
2181
|
-
handleComment(_) {
|
|
2182
|
-
return [];
|
|
2183
|
-
}
|
|
2184
|
-
async handleTable(node) {
|
|
2185
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2186
|
-
}
|
|
2187
|
-
async handleIframe(node) {
|
|
2188
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2189
|
-
}
|
|
2190
|
-
async handleImage(node) {
|
|
2191
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2192
|
-
}
|
|
2193
|
-
async handleBreak(_) {
|
|
2194
|
-
return [ContentElement.divider()];
|
|
2195
|
-
}
|
|
2196
|
-
async createQuote(node) {
|
|
2197
|
-
const items = await this.handleNested(node);
|
|
2198
|
-
return [ContentElement.quote(items)];
|
|
2402
|
+
return sections;
|
|
2199
2403
|
}
|
|
2200
|
-
async
|
|
2201
|
-
const
|
|
2202
|
-
|
|
2404
|
+
async loadWebsites(websites) {
|
|
2405
|
+
for (const website of websites) {
|
|
2406
|
+
this.sectionsByWebsite[website] = await this.loadWebsite(website);
|
|
2407
|
+
}
|
|
2408
|
+
this.websitesAreLoaded = true;
|
|
2203
2409
|
}
|
|
2204
|
-
|
|
2205
|
-
|
|
2410
|
+
save(section) {
|
|
2411
|
+
const website = section._website;
|
|
2412
|
+
assert.ok(website, 'Section must have a website');
|
|
2413
|
+
this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
|
|
2414
|
+
if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
|
|
2415
|
+
this.sectionsByWebsite[website].push(section);
|
|
2416
|
+
}
|
|
2206
2417
|
}
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2418
|
+
getById(id, website) {
|
|
2419
|
+
this.ensureWebsitesLoaded();
|
|
2420
|
+
const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
|
|
2421
|
+
return section;
|
|
2210
2422
|
}
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
return [
|
|
2423
|
+
getByWebsite(website) {
|
|
2424
|
+
this.ensureWebsitesLoaded();
|
|
2425
|
+
return this.sectionsByWebsite[website];
|
|
2214
2426
|
}
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2427
|
+
getParentSections(section) {
|
|
2428
|
+
this.ensureWebsitesLoaded();
|
|
2429
|
+
const parents = [];
|
|
2430
|
+
let current = section;
|
|
2431
|
+
while (current.parent?.default && current.parent.default !== '/') {
|
|
2432
|
+
const parent = this.getById(current.parent.default, section._website);
|
|
2433
|
+
if (!parent)
|
|
2434
|
+
break;
|
|
2435
|
+
parents.push(parent);
|
|
2436
|
+
current = parent;
|
|
2437
|
+
}
|
|
2438
|
+
return parents;
|
|
2220
2439
|
}
|
|
2221
|
-
|
|
2222
|
-
|
|
2440
|
+
ensureWebsitesLoaded() {
|
|
2441
|
+
assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
|
|
2223
2442
|
}
|
|
2224
2443
|
}
|
|
2225
2444
|
|
|
2226
|
-
var
|
|
2445
|
+
var Section = /*#__PURE__*/Object.freeze({
|
|
2227
2446
|
__proto__: null,
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2447
|
+
SectionsRepository: SectionsRepository,
|
|
2448
|
+
buildAndFlattenTree: buildAndFlattenTree,
|
|
2449
|
+
buildTree: buildTree,
|
|
2450
|
+
flattenTree: flattenTree,
|
|
2451
|
+
groupByWebsites: groupByWebsites,
|
|
2452
|
+
isReference: isReference,
|
|
2453
|
+
references: references,
|
|
2454
|
+
removeDuplicates: removeDuplicates
|
|
2231
2455
|
});
|
|
2232
2456
|
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2457
|
+
const ArcUtils = {
|
|
2458
|
+
Id,
|
|
2459
|
+
ANS,
|
|
2460
|
+
ContentElements,
|
|
2461
|
+
Section,
|
|
2462
|
+
};
|
|
2238
2463
|
|
|
2239
|
-
exports.AnsMapper = index$
|
|
2464
|
+
exports.AnsMapper = index$1;
|
|
2240
2465
|
exports.ArcAPI = ArcAPI;
|
|
2241
2466
|
exports.ArcError = ArcError;
|
|
2242
|
-
exports.ArcTypes = index
|
|
2467
|
+
exports.ArcTypes = index;
|
|
2243
2468
|
exports.ArcUtils = ArcUtils;
|
|
2244
|
-
exports.ContentElements = index;
|
|
2469
|
+
exports.ContentElements = index$2;
|
|
2245
2470
|
exports.WsClient = WsClient;
|
|
2246
2471
|
exports.default = ArcAPI;
|
|
2247
2472
|
//# sourceMappingURL=index.cjs.map
|