@code.store/arcxp-sdk-ts 5.2.0 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/author/index.d.ts +1 -1
- package/dist/api/content/index.d.ts +1 -1
- package/dist/api/content-ops/index.d.ts +1 -1
- package/dist/api/custom/index.d.ts +1 -1
- package/dist/api/developer-retail/index.d.ts +1 -1
- package/dist/api/draft/index.d.ts +1 -1
- package/dist/api/global-settings/index.d.ts +1 -1
- package/dist/api/identity/index.d.ts +1 -1
- package/dist/api/ifx/index.d.ts +1 -1
- package/dist/api/index.d.ts +2 -2
- package/dist/api/migration-center/index.d.ts +1 -1
- package/dist/api/photo-center/index.d.ts +1 -1
- package/dist/api/redirect/index.d.ts +2 -2
- package/dist/api/sales/index.d.ts +1 -1
- package/dist/api/signing-service/index.d.ts +1 -1
- package/dist/api/site/index.d.ts +1 -1
- package/dist/api/tags/index.d.ts +1 -1
- package/dist/api/websked/index.d.ts +1 -1
- package/dist/content-elements/html/html.utils.d.ts +0 -3
- package/dist/content-elements/index.d.ts +1 -0
- package/dist/content-elements/xml/index.d.ts +3 -0
- package/dist/content-elements/xml/xml.constants.d.ts +1 -0
- package/dist/content-elements/xml/xml.processor.d.ts +45 -0
- package/dist/content-elements/xml/xml.utils.d.ts +5 -0
- package/dist/index.cjs +1146 -921
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +4 -4
- package/dist/index.js +1143 -919
- package/dist/index.js.map +1 -1
- package/dist/types/index.d.ts +2 -2
- package/dist/utils/arc/content.d.ts +3 -1
- package/package.json +12 -5
package/dist/index.js
CHANGED
|
@@ -5,11 +5,12 @@ import fs from 'node:fs';
|
|
|
5
5
|
import path from 'node:path';
|
|
6
6
|
import FormData from 'form-data';
|
|
7
7
|
import * as ws from 'ws';
|
|
8
|
+
import { TextNode, HTMLElement, CommentNode, parse } from 'node-html-parser';
|
|
9
|
+
import { decode } from 'html-entities';
|
|
10
|
+
import * as xmldoc from 'xmldoc';
|
|
8
11
|
import encode from 'base32-encode';
|
|
9
12
|
import { v5 } from 'uuid';
|
|
10
13
|
import assert from 'node:assert';
|
|
11
|
-
import { TextNode, HTMLElement, CommentNode, parse } from 'node-html-parser';
|
|
12
|
-
import { decode } from 'html-entities';
|
|
13
14
|
|
|
14
15
|
const safeJSONStringify = (data) => {
|
|
15
16
|
try {
|
|
@@ -113,28 +114,6 @@ class ArcAuthor extends ArcAbstractAPI {
|
|
|
113
114
|
}
|
|
114
115
|
}
|
|
115
116
|
|
|
116
|
-
class ArcContentOps extends ArcAbstractAPI {
|
|
117
|
-
constructor(options) {
|
|
118
|
-
super({ ...options, apiPath: 'contentops/v1' });
|
|
119
|
-
}
|
|
120
|
-
async schedulePublish(payload) {
|
|
121
|
-
const { data } = await this.client.put('/publish', payload);
|
|
122
|
-
return data;
|
|
123
|
-
}
|
|
124
|
-
async scheduleUnpublish(payload) {
|
|
125
|
-
const { data } = await this.client.put('/unpublish', payload);
|
|
126
|
-
return data;
|
|
127
|
-
}
|
|
128
|
-
async unscheduleUnpublish(payload) {
|
|
129
|
-
const { data } = await this.client.put('/unschedule_unpublish', payload);
|
|
130
|
-
return data;
|
|
131
|
-
}
|
|
132
|
-
async unschedulePublish(payload) {
|
|
133
|
-
const { data } = await this.client.put('/unschedule_publish', payload);
|
|
134
|
-
return data;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
|
|
138
117
|
class ArcContent extends ArcAbstractAPI {
|
|
139
118
|
constructor(options) {
|
|
140
119
|
super({ ...options, apiPath: 'content/v4' });
|
|
@@ -159,6 +138,28 @@ class ArcContent extends ArcAbstractAPI {
|
|
|
159
138
|
}
|
|
160
139
|
}
|
|
161
140
|
|
|
141
|
+
class ArcContentOps extends ArcAbstractAPI {
|
|
142
|
+
constructor(options) {
|
|
143
|
+
super({ ...options, apiPath: 'contentops/v1' });
|
|
144
|
+
}
|
|
145
|
+
async schedulePublish(payload) {
|
|
146
|
+
const { data } = await this.client.put('/publish', payload);
|
|
147
|
+
return data;
|
|
148
|
+
}
|
|
149
|
+
async scheduleUnpublish(payload) {
|
|
150
|
+
const { data } = await this.client.put('/unpublish', payload);
|
|
151
|
+
return data;
|
|
152
|
+
}
|
|
153
|
+
async unscheduleUnpublish(payload) {
|
|
154
|
+
const { data } = await this.client.put('/unschedule_unpublish', payload);
|
|
155
|
+
return data;
|
|
156
|
+
}
|
|
157
|
+
async unschedulePublish(payload) {
|
|
158
|
+
const { data } = await this.client.put('/unschedule_publish', payload);
|
|
159
|
+
return data;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
162
163
|
class Custom extends ArcAbstractAPI {
|
|
163
164
|
constructor(options) {
|
|
164
165
|
super({ ...options, apiPath: '' });
|
|
@@ -172,6 +173,128 @@ class Custom extends ArcAbstractAPI {
|
|
|
172
173
|
}
|
|
173
174
|
}
|
|
174
175
|
|
|
176
|
+
class ArcDeveloperRetail extends ArcAbstractAPI {
|
|
177
|
+
constructor(options) {
|
|
178
|
+
super({ ...options, apiPath: 'retail/api/v1' });
|
|
179
|
+
}
|
|
180
|
+
// ============================================
|
|
181
|
+
// Product Methods
|
|
182
|
+
// ============================================
|
|
183
|
+
async getProductById(id, params) {
|
|
184
|
+
const { data } = await this.client.get(`/product/${id}`, { params });
|
|
185
|
+
return data;
|
|
186
|
+
}
|
|
187
|
+
async getProductBySku(sku, params) {
|
|
188
|
+
const { data } = await this.client.get(`/product/sku/${sku}`, { params });
|
|
189
|
+
return data;
|
|
190
|
+
}
|
|
191
|
+
async getProductByPriceCode(priceCode, params) {
|
|
192
|
+
const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
|
|
193
|
+
return data;
|
|
194
|
+
}
|
|
195
|
+
async getAllProducts(params) {
|
|
196
|
+
const { data } = await this.client.get('/product', { params });
|
|
197
|
+
return data;
|
|
198
|
+
}
|
|
199
|
+
// ============================================
|
|
200
|
+
// Pricing Strategy Methods
|
|
201
|
+
// ============================================
|
|
202
|
+
async getPricingStrategyById(id, params) {
|
|
203
|
+
const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
|
|
204
|
+
return data;
|
|
205
|
+
}
|
|
206
|
+
async getAllPricingStrategies(params) {
|
|
207
|
+
const { data } = await this.client.get('/pricing/strategy', { params });
|
|
208
|
+
return data;
|
|
209
|
+
}
|
|
210
|
+
// ============================================
|
|
211
|
+
// Pricing Rate Methods
|
|
212
|
+
// ============================================
|
|
213
|
+
async getPricingRateById(id, params) {
|
|
214
|
+
const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
|
|
215
|
+
return data;
|
|
216
|
+
}
|
|
217
|
+
async getAllPricingRates(params) {
|
|
218
|
+
const { data } = await this.client.get('/pricing/rate', { params });
|
|
219
|
+
return data;
|
|
220
|
+
}
|
|
221
|
+
// ============================================
|
|
222
|
+
// Pricing Cycle Methods
|
|
223
|
+
// ============================================
|
|
224
|
+
async getPricingCycle(priceCode, cycleIndex, startDate, params) {
|
|
225
|
+
const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
|
|
226
|
+
params,
|
|
227
|
+
});
|
|
228
|
+
return data;
|
|
229
|
+
}
|
|
230
|
+
// ============================================
|
|
231
|
+
// Campaign Methods
|
|
232
|
+
// ============================================
|
|
233
|
+
async getCampaignById(id, params) {
|
|
234
|
+
const { data } = await this.client.get(`/campaign/${id}`, { params });
|
|
235
|
+
return data;
|
|
236
|
+
}
|
|
237
|
+
async getCampaignByName(campaignName, params) {
|
|
238
|
+
const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
|
|
239
|
+
return data;
|
|
240
|
+
}
|
|
241
|
+
async getAllCampaigns(params) {
|
|
242
|
+
const { data } = await this.client.get('/campaign', { params });
|
|
243
|
+
return data;
|
|
244
|
+
}
|
|
245
|
+
// ============================================
|
|
246
|
+
// Campaign Category Methods
|
|
247
|
+
// ============================================
|
|
248
|
+
async getCampaignCategoryById(id, params) {
|
|
249
|
+
const { data } = await this.client.get(`/campaign/category/${id}`, { params });
|
|
250
|
+
return data;
|
|
251
|
+
}
|
|
252
|
+
async getAllCampaignCategories(params) {
|
|
253
|
+
const { data } = await this.client.get('/campaign/category', { params });
|
|
254
|
+
return data;
|
|
255
|
+
}
|
|
256
|
+
// ============================================
|
|
257
|
+
// Offer Methods
|
|
258
|
+
// ============================================
|
|
259
|
+
async getOfferById(id, params) {
|
|
260
|
+
const { data } = await this.client.get(`/offer/${id}`, { params });
|
|
261
|
+
return data;
|
|
262
|
+
}
|
|
263
|
+
async getAllOffers(params) {
|
|
264
|
+
const { data } = await this.client.get('/offer', { params });
|
|
265
|
+
return data;
|
|
266
|
+
}
|
|
267
|
+
// ============================================
|
|
268
|
+
// Offer Attribute Methods
|
|
269
|
+
// ============================================
|
|
270
|
+
async getOfferAttributeById(id, params) {
|
|
271
|
+
const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
|
|
272
|
+
return data;
|
|
273
|
+
}
|
|
274
|
+
async getAllOfferAttributes(params) {
|
|
275
|
+
const { data } = await this.client.get('/offer/attribute', { params });
|
|
276
|
+
return data;
|
|
277
|
+
}
|
|
278
|
+
// ============================================
|
|
279
|
+
// Product Attribute Methods
|
|
280
|
+
// ============================================
|
|
281
|
+
async getProductAttributeById(id, params) {
|
|
282
|
+
const { data } = await this.client.get(`/product/attribute/${id}`, { params });
|
|
283
|
+
return data;
|
|
284
|
+
}
|
|
285
|
+
async getAllProductAttributes(params) {
|
|
286
|
+
const { data } = await this.client.get('/product/attribute', { params });
|
|
287
|
+
return data;
|
|
288
|
+
}
|
|
289
|
+
// ============================================
|
|
290
|
+
// Condition Category Methods
|
|
291
|
+
// ============================================
|
|
292
|
+
async getAllConditionCategories(params) {
|
|
293
|
+
const { data } = await this.client.get('/condition/categories', { params });
|
|
294
|
+
return data;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
175
298
|
class ArcDraft extends ArcAbstractAPI {
|
|
176
299
|
constructor(options) {
|
|
177
300
|
super({ ...options, apiPath: 'draft/v1' });
|
|
@@ -568,190 +691,68 @@ class ArcRetailEvents {
|
|
|
568
691
|
}
|
|
569
692
|
}
|
|
570
693
|
|
|
571
|
-
class
|
|
694
|
+
class ArcSales extends ArcAbstractAPI {
|
|
572
695
|
constructor(options) {
|
|
573
|
-
super({ ...options, apiPath: '
|
|
696
|
+
super({ ...options, apiPath: 'sales/api/v1' });
|
|
574
697
|
}
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
const { data } = await this.client.
|
|
698
|
+
async migrate(params, payload) {
|
|
699
|
+
const FormData = await platform.form_data();
|
|
700
|
+
const form = new FormData();
|
|
701
|
+
form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
|
|
702
|
+
const { data } = await this.client.post('/migrate', form, {
|
|
703
|
+
params,
|
|
704
|
+
headers: {
|
|
705
|
+
...form.getHeaders(),
|
|
706
|
+
},
|
|
707
|
+
});
|
|
580
708
|
return data;
|
|
581
709
|
}
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
710
|
+
}
|
|
711
|
+
class ArcSalesV2 extends ArcAbstractAPI {
|
|
712
|
+
constructor(options) {
|
|
713
|
+
super({ ...options, apiPath: 'sales/api/v2' });
|
|
585
714
|
}
|
|
586
|
-
async
|
|
587
|
-
const { data } = await this.client.get(
|
|
715
|
+
async getEnterpriseGroups(params) {
|
|
716
|
+
const { data } = await this.client.get('/subscriptions/enterprise', {
|
|
717
|
+
params: {
|
|
718
|
+
'arc-site': params.site,
|
|
719
|
+
},
|
|
720
|
+
});
|
|
588
721
|
return data;
|
|
589
722
|
}
|
|
590
|
-
async
|
|
591
|
-
const { data } = await this.client.
|
|
723
|
+
async createEnterpriseGroup(params, payload) {
|
|
724
|
+
const { data } = await this.client.post('/subscriptions/enterprise', payload, {
|
|
725
|
+
params: {
|
|
726
|
+
'arc-site': params.site,
|
|
727
|
+
},
|
|
728
|
+
});
|
|
592
729
|
return data;
|
|
593
730
|
}
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
|
|
731
|
+
async createNonce(website, enterpriseGroupId) {
|
|
732
|
+
const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
|
|
733
|
+
params: { 'arc-site': website },
|
|
734
|
+
});
|
|
599
735
|
return data;
|
|
600
736
|
}
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
class ArcSigningService extends ArcAbstractAPI {
|
|
740
|
+
constructor(options) {
|
|
741
|
+
super({ ...options, apiPath: 'signing-service' });
|
|
604
742
|
}
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
// ============================================
|
|
608
|
-
async getPricingRateById(id, params) {
|
|
609
|
-
const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
|
|
743
|
+
async sign(service, serviceVersion, imageId) {
|
|
744
|
+
const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
|
|
610
745
|
return data;
|
|
611
746
|
}
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
class ArcSite extends ArcAbstractAPI {
|
|
750
|
+
constructor(options) {
|
|
751
|
+
super({ ...options, apiPath: 'site/v3' });
|
|
615
752
|
}
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
async getPricingCycle(priceCode, cycleIndex, startDate, params) {
|
|
620
|
-
const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
|
|
621
|
-
params,
|
|
622
|
-
});
|
|
623
|
-
return data;
|
|
624
|
-
}
|
|
625
|
-
// ============================================
|
|
626
|
-
// Campaign Methods
|
|
627
|
-
// ============================================
|
|
628
|
-
async getCampaignById(id, params) {
|
|
629
|
-
const { data } = await this.client.get(`/campaign/${id}`, { params });
|
|
630
|
-
return data;
|
|
631
|
-
}
|
|
632
|
-
async getCampaignByName(campaignName, params) {
|
|
633
|
-
const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
|
|
634
|
-
return data;
|
|
635
|
-
}
|
|
636
|
-
async getAllCampaigns(params) {
|
|
637
|
-
const { data } = await this.client.get('/campaign', { params });
|
|
638
|
-
return data;
|
|
639
|
-
}
|
|
640
|
-
// ============================================
|
|
641
|
-
// Campaign Category Methods
|
|
642
|
-
// ============================================
|
|
643
|
-
async getCampaignCategoryById(id, params) {
|
|
644
|
-
const { data } = await this.client.get(`/campaign/category/${id}`, { params });
|
|
645
|
-
return data;
|
|
646
|
-
}
|
|
647
|
-
async getAllCampaignCategories(params) {
|
|
648
|
-
const { data } = await this.client.get('/campaign/category', { params });
|
|
649
|
-
return data;
|
|
650
|
-
}
|
|
651
|
-
// ============================================
|
|
652
|
-
// Offer Methods
|
|
653
|
-
// ============================================
|
|
654
|
-
async getOfferById(id, params) {
|
|
655
|
-
const { data } = await this.client.get(`/offer/${id}`, { params });
|
|
656
|
-
return data;
|
|
657
|
-
}
|
|
658
|
-
async getAllOffers(params) {
|
|
659
|
-
const { data } = await this.client.get('/offer', { params });
|
|
660
|
-
return data;
|
|
661
|
-
}
|
|
662
|
-
// ============================================
|
|
663
|
-
// Offer Attribute Methods
|
|
664
|
-
// ============================================
|
|
665
|
-
async getOfferAttributeById(id, params) {
|
|
666
|
-
const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
|
|
667
|
-
return data;
|
|
668
|
-
}
|
|
669
|
-
async getAllOfferAttributes(params) {
|
|
670
|
-
const { data } = await this.client.get('/offer/attribute', { params });
|
|
671
|
-
return data;
|
|
672
|
-
}
|
|
673
|
-
// ============================================
|
|
674
|
-
// Product Attribute Methods
|
|
675
|
-
// ============================================
|
|
676
|
-
async getProductAttributeById(id, params) {
|
|
677
|
-
const { data } = await this.client.get(`/product/attribute/${id}`, { params });
|
|
678
|
-
return data;
|
|
679
|
-
}
|
|
680
|
-
async getAllProductAttributes(params) {
|
|
681
|
-
const { data } = await this.client.get('/product/attribute', { params });
|
|
682
|
-
return data;
|
|
683
|
-
}
|
|
684
|
-
// ============================================
|
|
685
|
-
// Condition Category Methods
|
|
686
|
-
// ============================================
|
|
687
|
-
async getAllConditionCategories(params) {
|
|
688
|
-
const { data } = await this.client.get('/condition/categories', { params });
|
|
689
|
-
return data;
|
|
690
|
-
}
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
class ArcSales extends ArcAbstractAPI {
|
|
694
|
-
constructor(options) {
|
|
695
|
-
super({ ...options, apiPath: 'sales/api/v1' });
|
|
696
|
-
}
|
|
697
|
-
async migrate(params, payload) {
|
|
698
|
-
const FormData = await platform.form_data();
|
|
699
|
-
const form = new FormData();
|
|
700
|
-
form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
|
|
701
|
-
const { data } = await this.client.post('/migrate', form, {
|
|
702
|
-
params,
|
|
703
|
-
headers: {
|
|
704
|
-
...form.getHeaders(),
|
|
705
|
-
},
|
|
706
|
-
});
|
|
707
|
-
return data;
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
class ArcSalesV2 extends ArcAbstractAPI {
|
|
711
|
-
constructor(options) {
|
|
712
|
-
super({ ...options, apiPath: 'sales/api/v2' });
|
|
713
|
-
}
|
|
714
|
-
async getEnterpriseGroups(params) {
|
|
715
|
-
const { data } = await this.client.get('/subscriptions/enterprise', {
|
|
716
|
-
params: {
|
|
717
|
-
'arc-site': params.site,
|
|
718
|
-
},
|
|
719
|
-
});
|
|
720
|
-
return data;
|
|
721
|
-
}
|
|
722
|
-
async createEnterpriseGroup(params, payload) {
|
|
723
|
-
const { data } = await this.client.post('/subscriptions/enterprise', payload, {
|
|
724
|
-
params: {
|
|
725
|
-
'arc-site': params.site,
|
|
726
|
-
},
|
|
727
|
-
});
|
|
728
|
-
return data;
|
|
729
|
-
}
|
|
730
|
-
async createNonce(website, enterpriseGroupId) {
|
|
731
|
-
const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
|
|
732
|
-
params: { 'arc-site': website },
|
|
733
|
-
});
|
|
734
|
-
return data;
|
|
735
|
-
}
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
class ArcSigningService extends ArcAbstractAPI {
|
|
739
|
-
constructor(options) {
|
|
740
|
-
super({ ...options, apiPath: 'signing-service' });
|
|
741
|
-
}
|
|
742
|
-
async sign(service, serviceVersion, imageId) {
|
|
743
|
-
const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
|
|
744
|
-
return data;
|
|
745
|
-
}
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
class ArcSite extends ArcAbstractAPI {
|
|
749
|
-
constructor(options) {
|
|
750
|
-
super({ ...options, apiPath: 'site/v3' });
|
|
751
|
-
}
|
|
752
|
-
async getSections(params) {
|
|
753
|
-
const { data } = await this.client.get(`/website/${params.website}/section`, {
|
|
754
|
-
params: { _website: params.website, ...params },
|
|
753
|
+
async getSections(params) {
|
|
754
|
+
const { data } = await this.client.get(`/website/${params.website}/section`, {
|
|
755
|
+
params: { _website: params.website, ...params },
|
|
755
756
|
});
|
|
756
757
|
return data;
|
|
757
758
|
}
|
|
@@ -889,85 +890,7 @@ const ArcAPI = (options) => {
|
|
|
889
890
|
return API;
|
|
890
891
|
};
|
|
891
892
|
|
|
892
|
-
|
|
893
|
-
/**
|
|
894
|
-
* This file was automatically generated by json-schema-to-typescript.
|
|
895
|
-
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
896
|
-
* and run json-schema-to-typescript to regenerate this file.
|
|
897
|
-
*/
|
|
898
|
-
|
|
899
|
-
var ansTypes = /*#__PURE__*/Object.freeze({
|
|
900
|
-
__proto__: null
|
|
901
|
-
});
|
|
902
|
-
|
|
903
|
-
var utils = /*#__PURE__*/Object.freeze({
|
|
904
|
-
__proto__: null
|
|
905
|
-
});
|
|
906
|
-
|
|
907
|
-
var ANSType;
|
|
908
|
-
(function (ANSType) {
|
|
909
|
-
ANSType["Story"] = "story";
|
|
910
|
-
ANSType["Video"] = "video";
|
|
911
|
-
ANSType["Tag"] = "tag";
|
|
912
|
-
ANSType["Author"] = "author";
|
|
913
|
-
ANSType["Gallery"] = "gallery";
|
|
914
|
-
ANSType["Image"] = "image";
|
|
915
|
-
ANSType["Redirect"] = "redirect";
|
|
916
|
-
})(ANSType || (ANSType = {}));
|
|
917
|
-
var MigrationStatus;
|
|
918
|
-
(function (MigrationStatus) {
|
|
919
|
-
MigrationStatus["Success"] = "Success";
|
|
920
|
-
MigrationStatus["Queued"] = "Queued";
|
|
921
|
-
MigrationStatus["Circulated"] = "Circulated";
|
|
922
|
-
MigrationStatus["Published"] = "Published";
|
|
923
|
-
MigrationStatus["Scheduled"] = "Scheduled";
|
|
924
|
-
MigrationStatus["FailVideo"] = "FailVideo";
|
|
925
|
-
MigrationStatus["FailImage"] = "FailImage";
|
|
926
|
-
MigrationStatus["FailPhoto"] = "FailPhoto";
|
|
927
|
-
MigrationStatus["FailStory"] = "FailStory";
|
|
928
|
-
MigrationStatus["FailGallery"] = "FailGallery";
|
|
929
|
-
MigrationStatus["FailAuthor"] = "FailAuthor";
|
|
930
|
-
MigrationStatus["FailTag"] = "FailTag";
|
|
931
|
-
MigrationStatus["ValidationFailed"] = "ValidationFailed";
|
|
932
|
-
})(MigrationStatus || (MigrationStatus = {}));
|
|
933
|
-
var SummarySortBy;
|
|
934
|
-
(function (SummarySortBy) {
|
|
935
|
-
SummarySortBy["CreateDate"] = "createDate";
|
|
936
|
-
SummarySortBy["UpdateDate"] = "updateDate";
|
|
937
|
-
SummarySortBy["Id"] = "id";
|
|
938
|
-
})(SummarySortBy || (SummarySortBy = {}));
|
|
939
|
-
var SummarySortOrder;
|
|
940
|
-
(function (SummarySortOrder) {
|
|
941
|
-
SummarySortOrder["ASC"] = "ASC";
|
|
942
|
-
SummarySortOrder["DESC"] = "DESC";
|
|
943
|
-
})(SummarySortOrder || (SummarySortOrder = {}));
|
|
944
|
-
|
|
945
|
-
var index$3 = /*#__PURE__*/Object.freeze({
|
|
946
|
-
__proto__: null,
|
|
947
|
-
ANS: ansTypes,
|
|
948
|
-
get ANSType () { return ANSType; },
|
|
949
|
-
get MigrationStatus () { return MigrationStatus; },
|
|
950
|
-
get SummarySortBy () { return SummarySortBy; },
|
|
951
|
-
get SummarySortOrder () { return SummarySortOrder; },
|
|
952
|
-
TypeUtils: utils
|
|
953
|
-
});
|
|
954
|
-
|
|
955
|
-
const reference = (ref) => {
|
|
956
|
-
return {
|
|
957
|
-
_id: ref.id,
|
|
958
|
-
type: 'reference',
|
|
959
|
-
referent: {
|
|
960
|
-
...ref,
|
|
961
|
-
},
|
|
962
|
-
};
|
|
963
|
-
};
|
|
964
|
-
|
|
965
|
-
var ANS = /*#__PURE__*/Object.freeze({
|
|
966
|
-
__proto__: null,
|
|
967
|
-
reference: reference
|
|
968
|
-
});
|
|
969
|
-
|
|
970
|
-
const ContentElement = {
|
|
893
|
+
const ContentElement$1 = {
|
|
971
894
|
divider: () => {
|
|
972
895
|
return {
|
|
973
896
|
type: 'divider',
|
|
@@ -1199,18 +1122,60 @@ const ContentElement = {
|
|
|
1199
1122
|
},
|
|
1200
1123
|
};
|
|
1201
1124
|
|
|
1125
|
+
const BLOCK_ELEMENT_TAGS$1 = [
|
|
1126
|
+
'ADDRESS',
|
|
1127
|
+
'ARTICLE',
|
|
1128
|
+
'ASIDE',
|
|
1129
|
+
'BLOCKQUOTE',
|
|
1130
|
+
'DETAILS',
|
|
1131
|
+
'DIV',
|
|
1132
|
+
'DL',
|
|
1133
|
+
'FIELDSET',
|
|
1134
|
+
'FIGCAPTION',
|
|
1135
|
+
'FIGURE',
|
|
1136
|
+
'FOOTER',
|
|
1137
|
+
'FORM',
|
|
1138
|
+
'H1',
|
|
1139
|
+
'H2',
|
|
1140
|
+
'H3',
|
|
1141
|
+
'H4',
|
|
1142
|
+
'H5',
|
|
1143
|
+
'H6',
|
|
1144
|
+
'HEADER',
|
|
1145
|
+
'HR',
|
|
1146
|
+
'LINE',
|
|
1147
|
+
'MAIN',
|
|
1148
|
+
'MENU',
|
|
1149
|
+
'NAV',
|
|
1150
|
+
'OL',
|
|
1151
|
+
'P',
|
|
1152
|
+
'PARAGRAPH',
|
|
1153
|
+
'PRE',
|
|
1154
|
+
'SECTION',
|
|
1155
|
+
'TABLE',
|
|
1156
|
+
'UL',
|
|
1157
|
+
'LI',
|
|
1158
|
+
'BODY',
|
|
1159
|
+
'HTML',
|
|
1160
|
+
];
|
|
1161
|
+
|
|
1162
|
+
var html_constants = /*#__PURE__*/Object.freeze({
|
|
1163
|
+
__proto__: null,
|
|
1164
|
+
BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS$1
|
|
1165
|
+
});
|
|
1166
|
+
|
|
1202
1167
|
const socialRegExps = {
|
|
1203
|
-
instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9
|
|
1204
|
-
twitter: /https:\/\/(?:www\.)?twitter\.com\/[
|
|
1205
|
-
tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId
|
|
1206
|
-
facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[
|
|
1207
|
-
facebookVideo: /https:\/\/www\.facebook\.com\/([
|
|
1168
|
+
instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9._-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-_.]+)\/?([0-9]+)?/,
|
|
1169
|
+
twitter: /https:\/\/(?:www\.)?twitter\.com\/[^/]+\/status(?:es)?\/(\d+)/,
|
|
1170
|
+
tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|&item_id=)(\d+))|\w+)/,
|
|
1171
|
+
facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^/]+\/(activity|posts))[/?].*$/,
|
|
1172
|
+
facebookVideo: /https:\/\/www\.facebook\.com\/([^/?].+\/)?video(s|\.php)[/?].*/,
|
|
1208
1173
|
};
|
|
1209
1174
|
function match(url, regex) {
|
|
1210
1175
|
return url.match(regex)?.[0];
|
|
1211
1176
|
}
|
|
1212
1177
|
function youtubeURLParser(url = '') {
|
|
1213
|
-
const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[
|
|
1178
|
+
const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
|
|
1214
1179
|
const id = url?.match(regExp)?.[1];
|
|
1215
1180
|
if (id) {
|
|
1216
1181
|
return `https://youtu.be/${id}`;
|
|
@@ -1240,273 +1205,698 @@ function createSocial(url = '') {
|
|
|
1240
1205
|
const embeds = [];
|
|
1241
1206
|
const instagram = instagramURLParser(url);
|
|
1242
1207
|
if (instagram) {
|
|
1243
|
-
embeds.push(ContentElement.instagram(instagram));
|
|
1208
|
+
embeds.push(ContentElement$1.instagram(instagram));
|
|
1244
1209
|
}
|
|
1245
1210
|
const twitter = twitterURLParser(url);
|
|
1246
1211
|
if (twitter) {
|
|
1247
|
-
embeds.push(ContentElement.twitter(twitter));
|
|
1212
|
+
embeds.push(ContentElement$1.twitter(twitter));
|
|
1248
1213
|
}
|
|
1249
1214
|
const tiktok = tiktokURLParser(url);
|
|
1250
1215
|
if (tiktok) {
|
|
1251
|
-
embeds.push(ContentElement.tiktok(tiktok));
|
|
1216
|
+
embeds.push(ContentElement$1.tiktok(tiktok));
|
|
1252
1217
|
}
|
|
1253
1218
|
const youtube = youtubeURLParser(url);
|
|
1254
1219
|
if (youtube) {
|
|
1255
|
-
embeds.push(ContentElement.youtube(youtube));
|
|
1220
|
+
embeds.push(ContentElement$1.youtube(youtube));
|
|
1256
1221
|
}
|
|
1257
1222
|
const facebookPost = facebookPostURLParser(url);
|
|
1258
1223
|
if (facebookPost) {
|
|
1259
|
-
embeds.push(ContentElement.facebook_post(facebookPost));
|
|
1224
|
+
embeds.push(ContentElement$1.facebook_post(facebookPost));
|
|
1260
1225
|
}
|
|
1261
1226
|
const facebookVideo = facebookVideoURLParser(url);
|
|
1262
1227
|
if (facebookVideo) {
|
|
1263
|
-
embeds.push(ContentElement.facebook_video(facebookVideo));
|
|
1228
|
+
embeds.push(ContentElement$1.facebook_video(facebookVideo));
|
|
1264
1229
|
}
|
|
1265
1230
|
return embeds;
|
|
1266
1231
|
}
|
|
1267
1232
|
const randomId = () => `${new Date().toISOString()}-${Math.random()}`;
|
|
1233
|
+
const isTextCE = (ce) => {
|
|
1234
|
+
return ce?.type === 'text';
|
|
1235
|
+
};
|
|
1236
|
+
const decodeHTMLEntities = (str) => decode(str);
|
|
1268
1237
|
|
|
1269
1238
|
var ContentElements = /*#__PURE__*/Object.freeze({
|
|
1270
1239
|
__proto__: null,
|
|
1271
1240
|
createSocial: createSocial,
|
|
1241
|
+
decodeHTMLEntities: decodeHTMLEntities,
|
|
1272
1242
|
facebookPostURLParser: facebookPostURLParser,
|
|
1273
1243
|
facebookVideoURLParser: facebookVideoURLParser,
|
|
1274
1244
|
instagramURLParser: instagramURLParser,
|
|
1245
|
+
isTextCE: isTextCE,
|
|
1275
1246
|
randomId: randomId,
|
|
1276
1247
|
tiktokURLParser: tiktokURLParser,
|
|
1277
1248
|
twitterURLParser: twitterURLParser,
|
|
1278
1249
|
youtubeURLParser: youtubeURLParser
|
|
1279
1250
|
});
|
|
1280
1251
|
|
|
1281
|
-
const
|
|
1282
|
-
|
|
1283
|
-
const buffer = v5(identifier, namespace, Buffer.alloc(16));
|
|
1284
|
-
return encode(buffer, 'RFC4648', { padding: false });
|
|
1252
|
+
const isTextNode$1 = (node) => {
|
|
1253
|
+
return node instanceof TextNode;
|
|
1285
1254
|
};
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
*
|
|
1289
|
-
* @example
|
|
1290
|
-
* ```ts
|
|
1291
|
-
* const generator = new IdGenerator(['my-org']);
|
|
1292
|
-
* const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
|
|
1293
|
-
* const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
|
|
1294
|
-
* ```
|
|
1295
|
-
*/
|
|
1296
|
-
class IdGenerator {
|
|
1297
|
-
constructor(namespaces) {
|
|
1298
|
-
if (!namespaces.length) {
|
|
1299
|
-
throw new Error('At least 1 namespace is required');
|
|
1300
|
-
}
|
|
1301
|
-
this.namespace = namespaces.join('-');
|
|
1302
|
-
}
|
|
1303
|
-
getArcId(id) {
|
|
1304
|
-
return generateArcId(id.toString(), this.namespace);
|
|
1305
|
-
}
|
|
1306
|
-
getSourceId(id, prefixes = []) {
|
|
1307
|
-
return [...prefixes, id].join('-');
|
|
1308
|
-
}
|
|
1309
|
-
}
|
|
1310
|
-
|
|
1311
|
-
var Id = /*#__PURE__*/Object.freeze({
|
|
1312
|
-
__proto__: null,
|
|
1313
|
-
IdGenerator: IdGenerator,
|
|
1314
|
-
generateArcId: generateArcId
|
|
1315
|
-
});
|
|
1316
|
-
|
|
1317
|
-
const buildTree = (items) => {
|
|
1318
|
-
const tree = [
|
|
1319
|
-
{
|
|
1320
|
-
id: '/',
|
|
1321
|
-
children: [],
|
|
1322
|
-
meta: new Proxy({}, {
|
|
1323
|
-
get: () => {
|
|
1324
|
-
throw new Error('Root node meta is not accessible');
|
|
1325
|
-
},
|
|
1326
|
-
}),
|
|
1327
|
-
parent: null,
|
|
1328
|
-
},
|
|
1329
|
-
];
|
|
1330
|
-
// Track nodes at each level to maintain parent-child relationships
|
|
1331
|
-
// stores last node at each level
|
|
1332
|
-
const currLevelNodes = {
|
|
1333
|
-
0: tree[0],
|
|
1334
|
-
};
|
|
1335
|
-
for (const item of items) {
|
|
1336
|
-
const node = {
|
|
1337
|
-
id: item.id,
|
|
1338
|
-
parent: null,
|
|
1339
|
-
children: [],
|
|
1340
|
-
meta: item,
|
|
1341
|
-
};
|
|
1342
|
-
// Determine the level of this node
|
|
1343
|
-
const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
|
|
1344
|
-
const level = Number(levelKey?.replace('N', '')) || 0;
|
|
1345
|
-
if (!level) {
|
|
1346
|
-
throw new Error(`Invalid level for section ${item.id}`);
|
|
1347
|
-
}
|
|
1348
|
-
// This is a child node - attach to its parent
|
|
1349
|
-
const parentLevel = level - 1;
|
|
1350
|
-
const parentNode = currLevelNodes[parentLevel];
|
|
1351
|
-
if (parentNode) {
|
|
1352
|
-
node.parent = parentNode;
|
|
1353
|
-
parentNode.children.push(node);
|
|
1354
|
-
}
|
|
1355
|
-
else {
|
|
1356
|
-
throw new Error(`Parent node not found for section ${item.id}`);
|
|
1357
|
-
}
|
|
1358
|
-
// Set this as the current node for its level
|
|
1359
|
-
currLevelNodes[level] = node;
|
|
1360
|
-
}
|
|
1361
|
-
// return root nodes children
|
|
1362
|
-
return tree[0].children;
|
|
1255
|
+
const isHTMLElement = (node) => {
|
|
1256
|
+
return node instanceof HTMLElement;
|
|
1363
1257
|
};
|
|
1364
|
-
const
|
|
1365
|
-
|
|
1366
|
-
const traverse = (node) => {
|
|
1367
|
-
flatten.push(node);
|
|
1368
|
-
for (const child of node.children) {
|
|
1369
|
-
traverse(child);
|
|
1370
|
-
}
|
|
1371
|
-
};
|
|
1372
|
-
// traverse all root nodes and their children
|
|
1373
|
-
for (const node of tree) {
|
|
1374
|
-
traverse(node);
|
|
1375
|
-
}
|
|
1376
|
-
return flatten;
|
|
1258
|
+
const isCommentNode = (node) => {
|
|
1259
|
+
return node instanceof CommentNode;
|
|
1377
1260
|
};
|
|
1378
|
-
const
|
|
1379
|
-
|
|
1380
|
-
return sections.reduce((acc, section) => {
|
|
1381
|
-
const website = section._website;
|
|
1382
|
-
if (!acc[website])
|
|
1383
|
-
acc[website] = [];
|
|
1384
|
-
acc[website].push(section);
|
|
1385
|
-
return acc;
|
|
1386
|
-
}, {});
|
|
1261
|
+
const nodeTagIs = (node, name) => {
|
|
1262
|
+
return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
|
|
1387
1263
|
};
|
|
1388
|
-
const
|
|
1389
|
-
return
|
|
1390
|
-
id: s._id,
|
|
1391
|
-
website: s._website,
|
|
1392
|
-
type: 'section',
|
|
1393
|
-
}));
|
|
1264
|
+
const nodeTagIn = (node, names) => {
|
|
1265
|
+
return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
|
|
1394
1266
|
};
|
|
1395
|
-
const
|
|
1396
|
-
|
|
1267
|
+
const htmlToText = (html, parseOptions) => {
|
|
1268
|
+
if (!html)
|
|
1269
|
+
return '';
|
|
1270
|
+
const doc = parse(html, parseOptions);
|
|
1271
|
+
return decodeHTMLEntities(doc.innerText);
|
|
1397
1272
|
};
|
|
1398
|
-
const
|
|
1399
|
-
const
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
}
|
|
1404
|
-
else {
|
|
1405
|
-
map.set(`${s._id}${s._website}`, s);
|
|
1406
|
-
}
|
|
1407
|
-
});
|
|
1408
|
-
return [...map.values()];
|
|
1273
|
+
const getHTMLElementAttribute = (e, key) => {
|
|
1274
|
+
const value = e.getAttribute(key);
|
|
1275
|
+
if (value)
|
|
1276
|
+
return value;
|
|
1277
|
+
return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
|
|
1409
1278
|
};
|
|
1410
|
-
class SectionsRepository {
|
|
1411
|
-
constructor(arc) {
|
|
1412
|
-
this.arc = arc;
|
|
1413
|
-
this.sectionsByWebsite = {};
|
|
1414
|
-
this.websitesAreLoaded = false;
|
|
1415
|
-
}
|
|
1416
|
-
async put(ans) {
|
|
1417
|
-
await this.arc.Site.putSection(ans);
|
|
1418
|
-
const created = await this.arc.Site.getSection(ans._id, ans.website);
|
|
1419
|
-
this.save(created);
|
|
1420
|
-
}
|
|
1421
|
-
async loadWebsite(website) {
|
|
1422
|
-
const sections = [];
|
|
1423
|
-
let next = true;
|
|
1424
|
-
let offset = 0;
|
|
1425
|
-
while (next) {
|
|
1426
|
-
const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
|
|
1427
|
-
return { q_results: [] };
|
|
1428
|
-
});
|
|
1429
|
-
if (migrated.q_results.length) {
|
|
1430
|
-
sections.push(...migrated.q_results);
|
|
1431
|
-
offset += migrated.q_results.length;
|
|
1432
|
-
}
|
|
1433
|
-
else {
|
|
1434
|
-
next = false;
|
|
1435
|
-
}
|
|
1436
|
-
}
|
|
1437
|
-
return sections;
|
|
1438
|
-
}
|
|
1439
|
-
async loadWebsites(websites) {
|
|
1440
|
-
for (const website of websites) {
|
|
1441
|
-
this.sectionsByWebsite[website] = await this.loadWebsite(website);
|
|
1442
|
-
}
|
|
1443
|
-
this.websitesAreLoaded = true;
|
|
1444
|
-
}
|
|
1445
|
-
save(section) {
|
|
1446
|
-
const website = section._website;
|
|
1447
|
-
assert.ok(website, 'Section must have a website');
|
|
1448
|
-
this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
|
|
1449
|
-
if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
|
|
1450
|
-
this.sectionsByWebsite[website].push(section);
|
|
1451
|
-
}
|
|
1452
|
-
}
|
|
1453
|
-
getById(id, website) {
|
|
1454
|
-
this.ensureWebsitesLoaded();
|
|
1455
|
-
const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
|
|
1456
|
-
return section;
|
|
1457
|
-
}
|
|
1458
|
-
getByWebsite(website) {
|
|
1459
|
-
this.ensureWebsitesLoaded();
|
|
1460
|
-
return this.sectionsByWebsite[website];
|
|
1461
|
-
}
|
|
1462
|
-
getParentSections(section) {
|
|
1463
|
-
this.ensureWebsitesLoaded();
|
|
1464
|
-
const parents = [];
|
|
1465
|
-
let current = section;
|
|
1466
|
-
while (current.parent?.default && current.parent.default !== '/') {
|
|
1467
|
-
const parent = this.getById(current.parent.default, section._website);
|
|
1468
|
-
if (!parent)
|
|
1469
|
-
break;
|
|
1470
|
-
parents.push(parent);
|
|
1471
|
-
current = parent;
|
|
1472
|
-
}
|
|
1473
|
-
return parents;
|
|
1474
|
-
}
|
|
1475
|
-
ensureWebsitesLoaded() {
|
|
1476
|
-
assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
|
|
1477
|
-
}
|
|
1478
|
-
}
|
|
1479
1279
|
|
|
1480
|
-
var
|
|
1280
|
+
var html_utils = /*#__PURE__*/Object.freeze({
|
|
1481
1281
|
__proto__: null,
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
removeDuplicates: removeDuplicates
|
|
1282
|
+
getHTMLElementAttribute: getHTMLElementAttribute,
|
|
1283
|
+
htmlToText: htmlToText,
|
|
1284
|
+
isCommentNode: isCommentNode,
|
|
1285
|
+
isHTMLElement: isHTMLElement,
|
|
1286
|
+
isTextNode: isTextNode$1,
|
|
1287
|
+
nodeTagIn: nodeTagIn,
|
|
1288
|
+
nodeTagIs: nodeTagIs
|
|
1490
1289
|
});
|
|
1491
1290
|
|
|
1492
|
-
const ArcUtils = {
|
|
1493
|
-
Id,
|
|
1494
|
-
ANS,
|
|
1495
|
-
ContentElements,
|
|
1496
|
-
Section,
|
|
1497
|
-
};
|
|
1498
|
-
|
|
1499
1291
|
/**
|
|
1500
|
-
*
|
|
1501
|
-
*
|
|
1292
|
+
* HTMLProcessor is responsible for parsing HTML content into structured content elements.
|
|
1293
|
+
* It provides a flexible way to handle different HTML nodes and wrap text content.
|
|
1502
1294
|
*
|
|
1503
|
-
*
|
|
1504
|
-
*
|
|
1505
|
-
* Then you can override the specific methods to enrich the story with the data from BBC
|
|
1295
|
+
* The processor can be extended with custom handlers for specific node types and
|
|
1296
|
+
* wrappers for text content.
|
|
1506
1297
|
*
|
|
1507
|
-
*
|
|
1298
|
+
* @example
|
|
1299
|
+
* ```ts
|
|
1300
|
+
* // Create and initialize processor
|
|
1301
|
+
* const processor = new HTMLProcessor();
|
|
1302
|
+
* processor.init();
|
|
1303
|
+
*
|
|
1304
|
+
* // Parse HTML content
|
|
1305
|
+
* const html = '<div><p>Some text</p><img src="image.jpg"></div>';
|
|
1306
|
+
* const elements = await processor.parse(html);
|
|
1307
|
+
* ```
|
|
1308
|
+
*
|
|
1309
|
+
* The processor comes with built-in handlers for common HTML elements like links,
|
|
1310
|
+
* text formatting (i, u, strong), and block elements. Custom handlers can be added
|
|
1311
|
+
* using the `handle()` and `wrap()` methods.
|
|
1508
1312
|
*/
|
|
1509
|
-
class
|
|
1313
|
+
class HTMLProcessor {
|
|
1314
|
+
constructor() {
|
|
1315
|
+
this.parallelProcessing = true;
|
|
1316
|
+
this.handlers = {
|
|
1317
|
+
node: new Map(),
|
|
1318
|
+
wrap: new Map(),
|
|
1319
|
+
};
|
|
1320
|
+
}
|
|
1321
|
+
init() {
|
|
1322
|
+
// wrappers are used to wrap the content of nested text nodes
|
|
1323
|
+
// in a specific way
|
|
1324
|
+
this.wrap('link', (node, text) => {
|
|
1325
|
+
if (nodeTagIn(node, ['a'])) {
|
|
1326
|
+
const attributes = ['href', 'target', 'rel']
|
|
1327
|
+
.map((attr) => [attr, getHTMLElementAttribute(node, attr)])
|
|
1328
|
+
.filter(([_, value]) => value)
|
|
1329
|
+
.map(([key, value]) => `${key}="${value}"`)
|
|
1330
|
+
.join(' ');
|
|
1331
|
+
return {
|
|
1332
|
+
...text,
|
|
1333
|
+
content: `<a ${attributes}>${text.content}</a>`,
|
|
1334
|
+
};
|
|
1335
|
+
}
|
|
1336
|
+
});
|
|
1337
|
+
this.wrap('i', (node, text) => {
|
|
1338
|
+
if (nodeTagIn(node, ['i'])) {
|
|
1339
|
+
return {
|
|
1340
|
+
...text,
|
|
1341
|
+
content: `<i>${text.content}</i>`,
|
|
1342
|
+
};
|
|
1343
|
+
}
|
|
1344
|
+
});
|
|
1345
|
+
this.wrap('u', (node, text) => {
|
|
1346
|
+
if (nodeTagIn(node, ['u'])) {
|
|
1347
|
+
return {
|
|
1348
|
+
...text,
|
|
1349
|
+
content: `<u>${text.content}</u>`,
|
|
1350
|
+
};
|
|
1351
|
+
}
|
|
1352
|
+
});
|
|
1353
|
+
this.wrap('sup/sub', (node, text) => {
|
|
1354
|
+
if (nodeTagIn(node, ['sup', 'sub'])) {
|
|
1355
|
+
return {
|
|
1356
|
+
...text,
|
|
1357
|
+
content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
|
|
1358
|
+
};
|
|
1359
|
+
}
|
|
1360
|
+
});
|
|
1361
|
+
this.wrap('strong', (node, text) => {
|
|
1362
|
+
if (nodeTagIn(node, ['strong', 'b'])) {
|
|
1363
|
+
return {
|
|
1364
|
+
...text,
|
|
1365
|
+
content: `<b>${text.content}</b>`,
|
|
1366
|
+
};
|
|
1367
|
+
}
|
|
1368
|
+
});
|
|
1369
|
+
this.wrap('center', (node, text) => {
|
|
1370
|
+
if (nodeTagIn(node, ['center'])) {
|
|
1371
|
+
return {
|
|
1372
|
+
...text,
|
|
1373
|
+
alignment: 'center',
|
|
1374
|
+
};
|
|
1375
|
+
}
|
|
1376
|
+
});
|
|
1377
|
+
this.wrap('aligned-paragraph', (node, text) => {
|
|
1378
|
+
if (nodeTagIn(node, ['p'])) {
|
|
1379
|
+
const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
|
|
1380
|
+
if (!styleAttribute)
|
|
1381
|
+
return text;
|
|
1382
|
+
if (styleAttribute.includes('text-align: right;')) {
|
|
1383
|
+
return {
|
|
1384
|
+
...text,
|
|
1385
|
+
alignment: 'right',
|
|
1386
|
+
};
|
|
1387
|
+
}
|
|
1388
|
+
if (styleAttribute.includes('text-align: left;')) {
|
|
1389
|
+
return {
|
|
1390
|
+
...text,
|
|
1391
|
+
alignment: 'left',
|
|
1392
|
+
};
|
|
1393
|
+
}
|
|
1394
|
+
if (styleAttribute.includes('text-align: center;')) {
|
|
1395
|
+
return {
|
|
1396
|
+
...text,
|
|
1397
|
+
alignment: 'center',
|
|
1398
|
+
};
|
|
1399
|
+
}
|
|
1400
|
+
return text;
|
|
1401
|
+
}
|
|
1402
|
+
});
|
|
1403
|
+
// handlers are used to handle specific nodes
|
|
1404
|
+
// and return a list of content elements
|
|
1405
|
+
this.handle('default', (node) => {
|
|
1406
|
+
const noTag = isHTMLElement(node) && !node.tagName;
|
|
1407
|
+
if (noTag ||
|
|
1408
|
+
nodeTagIn(node, [
|
|
1409
|
+
'p',
|
|
1410
|
+
'a',
|
|
1411
|
+
'b',
|
|
1412
|
+
'sup',
|
|
1413
|
+
'sub',
|
|
1414
|
+
'span',
|
|
1415
|
+
'strong',
|
|
1416
|
+
'em',
|
|
1417
|
+
'i',
|
|
1418
|
+
'u',
|
|
1419
|
+
'section',
|
|
1420
|
+
'main',
|
|
1421
|
+
'div',
|
|
1422
|
+
'li',
|
|
1423
|
+
'center',
|
|
1424
|
+
])) {
|
|
1425
|
+
return this.handleNested(node);
|
|
1426
|
+
}
|
|
1427
|
+
});
|
|
1428
|
+
this.handle('headers', (node) => {
|
|
1429
|
+
if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
|
|
1430
|
+
return this.createHeader(node);
|
|
1431
|
+
}
|
|
1432
|
+
});
|
|
1433
|
+
this.handle('text', (node) => {
|
|
1434
|
+
if (isTextNode$1(node)) {
|
|
1435
|
+
return this.createText(node);
|
|
1436
|
+
}
|
|
1437
|
+
});
|
|
1438
|
+
this.handle('comment', (node) => {
|
|
1439
|
+
if (isCommentNode(node)) {
|
|
1440
|
+
return this.handleComment(node);
|
|
1441
|
+
}
|
|
1442
|
+
});
|
|
1443
|
+
this.handle('list', async (node) => {
|
|
1444
|
+
if (nodeTagIn(node, ['ul', 'ol'])) {
|
|
1445
|
+
const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
|
|
1446
|
+
return this.createList(node, listType);
|
|
1447
|
+
}
|
|
1448
|
+
});
|
|
1449
|
+
this.handle('table', (node) => {
|
|
1450
|
+
if (nodeTagIs(node, 'table')) {
|
|
1451
|
+
return this.handleTable(node);
|
|
1452
|
+
}
|
|
1453
|
+
});
|
|
1454
|
+
this.handle('iframe', (node) => {
|
|
1455
|
+
if (nodeTagIs(node, 'iframe')) {
|
|
1456
|
+
return this.handleIframe(node);
|
|
1457
|
+
}
|
|
1458
|
+
});
|
|
1459
|
+
this.handle('img', (node) => {
|
|
1460
|
+
if (nodeTagIs(node, 'img')) {
|
|
1461
|
+
return this.handleImage(node);
|
|
1462
|
+
}
|
|
1463
|
+
});
|
|
1464
|
+
this.handle('br', (node) => {
|
|
1465
|
+
if (nodeTagIs(node, 'br')) {
|
|
1466
|
+
return this.handleBreak(node);
|
|
1467
|
+
}
|
|
1468
|
+
});
|
|
1469
|
+
}
|
|
1470
|
+
handle(name, handler) {
|
|
1471
|
+
if (this.handlers.node.has(name)) {
|
|
1472
|
+
this.warn({ name }, `${name} node handler already set`);
|
|
1473
|
+
}
|
|
1474
|
+
this.handlers.node.set(name, handler);
|
|
1475
|
+
}
|
|
1476
|
+
wrap(name, handler) {
|
|
1477
|
+
if (this.handlers.wrap.has(name)) {
|
|
1478
|
+
this.warn({ name }, `${name} wrap handler already set`);
|
|
1479
|
+
}
|
|
1480
|
+
this.handlers.wrap.set(name, handler);
|
|
1481
|
+
}
|
|
1482
|
+
async parse(html) {
|
|
1483
|
+
const doc = parse(html, { comment: true });
|
|
1484
|
+
doc.removeWhitespace();
|
|
1485
|
+
const elements = await this.process(doc);
|
|
1486
|
+
const filtered = elements?.filter((e) => e.type !== 'divider');
|
|
1487
|
+
return filtered || [];
|
|
1488
|
+
}
|
|
1489
|
+
addTextAdditionalProperties(c, parent) {
|
|
1490
|
+
const additionalProperties = c.additional_properties || {};
|
|
1491
|
+
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
1492
|
+
c.additional_properties = {
|
|
1493
|
+
...c.additional_properties,
|
|
1494
|
+
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
1495
|
+
};
|
|
1496
|
+
return c;
|
|
1497
|
+
}
|
|
1498
|
+
/**
|
|
1499
|
+
* Wraps text content elements with additional properties and handlers.
|
|
1500
|
+
* This method iterates through an array of content elements and applies
|
|
1501
|
+
* wrappers to text elements.
|
|
1502
|
+
*
|
|
1503
|
+
* @param node - The HTML node containing the text elements
|
|
1504
|
+
**/
|
|
1505
|
+
wrapChildrenTextNodes(node, elements) {
|
|
1506
|
+
const wrapped = [];
|
|
1507
|
+
const wrappers = [...this.handlers.wrap.values()];
|
|
1508
|
+
for (const c of elements) {
|
|
1509
|
+
if (!isTextCE(c)) {
|
|
1510
|
+
wrapped.push(c);
|
|
1511
|
+
continue;
|
|
1512
|
+
}
|
|
1513
|
+
this.addTextAdditionalProperties(c, node);
|
|
1514
|
+
const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
|
|
1515
|
+
wrapped.push(handled || c);
|
|
1516
|
+
}
|
|
1517
|
+
return wrapped;
|
|
1518
|
+
}
|
|
1519
|
+
/**
|
|
1520
|
+
* Handles nested nodes by processing their children and merging text elements.
|
|
1521
|
+
* This method recursively processes the children of a given HTML node and
|
|
1522
|
+
* returns a list of content elements.
|
|
1523
|
+
*
|
|
1524
|
+
* @param node - The HTML node to process
|
|
1525
|
+
**/
|
|
1526
|
+
async handleNested(node) {
|
|
1527
|
+
const children = await this.processChildNodes(node);
|
|
1528
|
+
const filtered = children.filter(Boolean).flat();
|
|
1529
|
+
const merged = this.mergeParagraphs(filtered);
|
|
1530
|
+
const wrapped = this.wrapChildrenTextNodes(node, merged);
|
|
1531
|
+
return wrapped;
|
|
1532
|
+
}
|
|
1533
|
+
async processChildNodes(node) {
|
|
1534
|
+
if (this.parallelProcessing) {
|
|
1535
|
+
return await Promise.all(node.childNodes.map((child) => this.process(child)));
|
|
1536
|
+
}
|
|
1537
|
+
const children = [];
|
|
1538
|
+
for (const child of node.childNodes) {
|
|
1539
|
+
children.push(await this.process(child));
|
|
1540
|
+
}
|
|
1541
|
+
return children;
|
|
1542
|
+
}
|
|
1543
|
+
/**
|
|
1544
|
+
* Processes a single HTML node and converts it into content elements.
|
|
1545
|
+
* This method iterates through registered node handlers and attempts to process the node.
|
|
1546
|
+
* If a handler successfully processes the node, it returns an array of content elements.
|
|
1547
|
+
*
|
|
1548
|
+
* @param node - The HTML node to process
|
|
1549
|
+
* @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
|
|
1550
|
+
*/
|
|
1551
|
+
async process(node) {
|
|
1552
|
+
let isKnownNode = false;
|
|
1553
|
+
const elements = [];
|
|
1554
|
+
for (const [name, handler] of this.handlers.node.entries()) {
|
|
1555
|
+
try {
|
|
1556
|
+
const result = await handler(node);
|
|
1557
|
+
if (result) {
|
|
1558
|
+
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
1559
|
+
isKnownNode = true;
|
|
1560
|
+
elements.push(...result);
|
|
1561
|
+
break;
|
|
1562
|
+
}
|
|
1563
|
+
}
|
|
1564
|
+
catch (error) {
|
|
1565
|
+
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
if (isKnownNode)
|
|
1569
|
+
return elements;
|
|
1570
|
+
this.warn({ node: node.toString() }, 'UnknownNodeError');
|
|
1571
|
+
}
|
|
1572
|
+
/**
|
|
1573
|
+
* Merges adjacent text content elements into a single paragraph.
|
|
1574
|
+
* This method iterates through an array of content elements and combines
|
|
1575
|
+
* adjacent text elements into a single paragraph.
|
|
1576
|
+
*
|
|
1577
|
+
* @param items - The array of content elements to merge
|
|
1578
|
+
**/
|
|
1579
|
+
mergeParagraphs(items) {
|
|
1580
|
+
const merged = [];
|
|
1581
|
+
let toMerge = [];
|
|
1582
|
+
const merge = () => {
|
|
1583
|
+
if (!toMerge.length)
|
|
1584
|
+
return;
|
|
1585
|
+
const paragraph = toMerge.reduce((acc, p) => {
|
|
1586
|
+
return {
|
|
1587
|
+
...p,
|
|
1588
|
+
content: acc.content + p.content,
|
|
1589
|
+
};
|
|
1590
|
+
}, { type: 'text', content: '' });
|
|
1591
|
+
merged.push(paragraph);
|
|
1592
|
+
toMerge = [];
|
|
1593
|
+
};
|
|
1594
|
+
for (let i = 0; i < items.length; i++) {
|
|
1595
|
+
const item = items[i];
|
|
1596
|
+
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
1597
|
+
if (isTextCE(item) && !isBlockElement) {
|
|
1598
|
+
toMerge.push(item);
|
|
1599
|
+
}
|
|
1600
|
+
else {
|
|
1601
|
+
merge();
|
|
1602
|
+
merged.push(item);
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
merge();
|
|
1606
|
+
return merged;
|
|
1607
|
+
}
|
|
1608
|
+
handleComment(_) {
|
|
1609
|
+
return [];
|
|
1610
|
+
}
|
|
1611
|
+
async handleTable(node) {
|
|
1612
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1613
|
+
}
|
|
1614
|
+
async handleIframe(node) {
|
|
1615
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1616
|
+
}
|
|
1617
|
+
async handleImage(node) {
|
|
1618
|
+
return [ContentElement$1.raw_html(node.toString())];
|
|
1619
|
+
}
|
|
1620
|
+
async handleBreak(_) {
|
|
1621
|
+
return [ContentElement$1.divider()];
|
|
1622
|
+
}
|
|
1623
|
+
async createQuote(node) {
|
|
1624
|
+
const items = await this.handleNested(node);
|
|
1625
|
+
return [ContentElement$1.quote(items)];
|
|
1626
|
+
}
|
|
1627
|
+
async createText(node) {
|
|
1628
|
+
const text = ContentElement$1.text(node.text);
|
|
1629
|
+
return [text];
|
|
1630
|
+
}
|
|
1631
|
+
filterListItems(items) {
|
|
1632
|
+
return items.filter((i) => ['text', 'list'].includes(i.type));
|
|
1633
|
+
}
|
|
1634
|
+
async createList(node, type) {
|
|
1635
|
+
const items = await this.handleNested(node);
|
|
1636
|
+
return [ContentElement$1.list(type, this.filterListItems(items))];
|
|
1637
|
+
}
|
|
1638
|
+
async createHeader(node) {
|
|
1639
|
+
const level = +node.tagName.split('H')[1] || 3;
|
|
1640
|
+
return [ContentElement$1.header(node.innerText, level)];
|
|
1641
|
+
}
|
|
1642
|
+
isBlockElement(node) {
|
|
1643
|
+
if (!isHTMLElement(node))
|
|
1644
|
+
return false;
|
|
1645
|
+
const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS$1);
|
|
1646
|
+
return defaultBlockElements.has(node.tagName);
|
|
1647
|
+
}
|
|
1648
|
+
warn(metadata, message) {
|
|
1649
|
+
console.warn(metadata, message);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
|
|
1653
|
+
var index$4 = /*#__PURE__*/Object.freeze({
|
|
1654
|
+
__proto__: null,
|
|
1655
|
+
Constants: html_constants,
|
|
1656
|
+
HTMLProcessor: HTMLProcessor,
|
|
1657
|
+
Utils: html_utils
|
|
1658
|
+
});
|
|
1659
|
+
|
|
1660
|
+
const BLOCK_ELEMENT_TAGS = ['paragraph', 'line', 'header', 'ul', 'ol', 'li', 'embed', 'iframe', 'table'];
|
|
1661
|
+
|
|
1662
|
+
var xml_constants = /*#__PURE__*/Object.freeze({
|
|
1663
|
+
__proto__: null,
|
|
1664
|
+
BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
|
|
1665
|
+
});
|
|
1666
|
+
|
|
1667
|
+
const isXmlElement = (node) => {
|
|
1668
|
+
return node?.type === 'element';
|
|
1669
|
+
};
|
|
1670
|
+
const isTextNode = (node) => {
|
|
1671
|
+
return node?.type === 'text';
|
|
1672
|
+
};
|
|
1673
|
+
const nodeNameIs = (node, name) => {
|
|
1674
|
+
return isXmlElement(node) && node.name === name;
|
|
1675
|
+
};
|
|
1676
|
+
const nodeNameIn = (node, names) => {
|
|
1677
|
+
return isXmlElement(node) && names.includes(node.name);
|
|
1678
|
+
};
|
|
1679
|
+
|
|
1680
|
+
var xml_utils = /*#__PURE__*/Object.freeze({
|
|
1681
|
+
__proto__: null,
|
|
1682
|
+
isTextNode: isTextNode,
|
|
1683
|
+
isXmlElement: isXmlElement,
|
|
1684
|
+
nodeNameIn: nodeNameIn,
|
|
1685
|
+
nodeNameIs: nodeNameIs
|
|
1686
|
+
});
|
|
1687
|
+
|
|
1688
|
+
const ContentElement = ContentElement$1;
|
|
1689
|
+
class XMLProcessor {
|
|
1690
|
+
constructor() {
|
|
1691
|
+
this.handlers = {
|
|
1692
|
+
node: new Map(),
|
|
1693
|
+
wrap: new Map(),
|
|
1694
|
+
};
|
|
1695
|
+
}
|
|
1696
|
+
init() {
|
|
1697
|
+
// wrappers are used to wrap the content of nested text nodes
|
|
1698
|
+
// in a specific way
|
|
1699
|
+
this.wrap('link', (node, content) => {
|
|
1700
|
+
return `<a href="${node.attr.url || node.attr.href || '/'}">${content}</a>`;
|
|
1701
|
+
});
|
|
1702
|
+
this.wrap('header', (_node, content) => {
|
|
1703
|
+
return `<h3>${content}</h3>`;
|
|
1704
|
+
});
|
|
1705
|
+
this.wrap('emphasize', (_node, content) => {
|
|
1706
|
+
return `<i>${content}</i>`;
|
|
1707
|
+
});
|
|
1708
|
+
this.wrap('strong', (_node, content) => {
|
|
1709
|
+
return `<b>${content}</b>`;
|
|
1710
|
+
});
|
|
1711
|
+
// handlers are used to handle specific nodes
|
|
1712
|
+
// and return a list of content elements
|
|
1713
|
+
this.handle('default', (node) => {
|
|
1714
|
+
if (nodeNameIn(node, ['section', 'paragraph', 'line', 'header', 'emphasize', 'strong', 'link', 'li'])) {
|
|
1715
|
+
return this.handleNested(node);
|
|
1716
|
+
}
|
|
1717
|
+
});
|
|
1718
|
+
this.handle('text', (node) => {
|
|
1719
|
+
if (isTextNode(node)) {
|
|
1720
|
+
return [ContentElement.text(node.text)];
|
|
1721
|
+
}
|
|
1722
|
+
});
|
|
1723
|
+
this.handle('list', async (node) => {
|
|
1724
|
+
if (nodeNameIn(node, ['ul', 'ol'])) {
|
|
1725
|
+
const listType = node.name === 'ul' ? 'unordered' : 'ordered';
|
|
1726
|
+
return this.createList(node, listType);
|
|
1727
|
+
}
|
|
1728
|
+
});
|
|
1729
|
+
this.handle('table', (node) => {
|
|
1730
|
+
if (nodeNameIs(node, 'table')) {
|
|
1731
|
+
return this.handleTable(node);
|
|
1732
|
+
}
|
|
1733
|
+
});
|
|
1734
|
+
}
|
|
1735
|
+
async parse(xml) {
|
|
1736
|
+
const doc = new xmldoc.XmlDocument(xml);
|
|
1737
|
+
const elements = await this.process(doc);
|
|
1738
|
+
return elements || [];
|
|
1739
|
+
}
|
|
1740
|
+
handle(name, handler) {
|
|
1741
|
+
if (this.handlers.node.has(name)) {
|
|
1742
|
+
throw new Error(`${name} node handler already set`);
|
|
1743
|
+
}
|
|
1744
|
+
this.handlers.node.set(name, handler);
|
|
1745
|
+
}
|
|
1746
|
+
wrap(name, handler) {
|
|
1747
|
+
if (this.handlers.wrap.has(name)) {
|
|
1748
|
+
throw new Error(`${name} wrap handler already set`);
|
|
1749
|
+
}
|
|
1750
|
+
this.handlers.wrap.set(name, handler);
|
|
1751
|
+
}
|
|
1752
|
+
addTextAdditionalProperties(c, parent) {
|
|
1753
|
+
const additionalProperties = c.additional_properties || {};
|
|
1754
|
+
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
1755
|
+
c.additional_properties = {
|
|
1756
|
+
...c.additional_properties,
|
|
1757
|
+
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
1758
|
+
};
|
|
1759
|
+
return c;
|
|
1760
|
+
}
|
|
1761
|
+
wrapChildrenTextNodes(node, elements) {
|
|
1762
|
+
const wrapped = [];
|
|
1763
|
+
for (const c of elements) {
|
|
1764
|
+
if (!isTextCE(c)) {
|
|
1765
|
+
wrapped.push(c);
|
|
1766
|
+
continue;
|
|
1767
|
+
}
|
|
1768
|
+
this.addTextAdditionalProperties(c, node);
|
|
1769
|
+
const handler = this.handlers.wrap.get(node.name);
|
|
1770
|
+
if (handler) {
|
|
1771
|
+
wrapped.push({
|
|
1772
|
+
...c,
|
|
1773
|
+
content: handler(node, c.content),
|
|
1774
|
+
});
|
|
1775
|
+
}
|
|
1776
|
+
else {
|
|
1777
|
+
wrapped.push(c);
|
|
1778
|
+
}
|
|
1779
|
+
}
|
|
1780
|
+
return wrapped;
|
|
1781
|
+
}
|
|
1782
|
+
async handleNested(node) {
|
|
1783
|
+
const children = await Promise.all(node.children.map((child) => this.process(child)));
|
|
1784
|
+
const filtered = children.filter(Boolean).flat();
|
|
1785
|
+
const merged = this.mergeParagraphs(filtered);
|
|
1786
|
+
const wrapped = this.wrapChildrenTextNodes(node, merged);
|
|
1787
|
+
return wrapped;
|
|
1788
|
+
}
|
|
1789
|
+
async process(node) {
|
|
1790
|
+
let isKnownNode = false;
|
|
1791
|
+
const elements = [];
|
|
1792
|
+
for (const [name, handler] of this.handlers.node.entries()) {
|
|
1793
|
+
try {
|
|
1794
|
+
const result = await handler(node);
|
|
1795
|
+
if (Array.isArray(result)) {
|
|
1796
|
+
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
1797
|
+
isKnownNode = true;
|
|
1798
|
+
elements.push(...result);
|
|
1799
|
+
break;
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
catch (error) {
|
|
1803
|
+
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
if (isKnownNode)
|
|
1807
|
+
return elements;
|
|
1808
|
+
this.warn({ node: node.toString(), type: node.type }, 'UnknownNodeError');
|
|
1809
|
+
}
|
|
1810
|
+
mergeParagraphs(items) {
|
|
1811
|
+
const merged = [];
|
|
1812
|
+
let toMerge = [];
|
|
1813
|
+
const merge = () => {
|
|
1814
|
+
if (!toMerge.length)
|
|
1815
|
+
return;
|
|
1816
|
+
const paragraph = toMerge.reduce((acc, p) => {
|
|
1817
|
+
return {
|
|
1818
|
+
...p,
|
|
1819
|
+
content: acc.content + p.content,
|
|
1820
|
+
};
|
|
1821
|
+
}, { type: 'text', content: '' });
|
|
1822
|
+
merged.push(paragraph);
|
|
1823
|
+
toMerge = [];
|
|
1824
|
+
};
|
|
1825
|
+
for (let i = 0; i < items.length; i++) {
|
|
1826
|
+
const item = items[i];
|
|
1827
|
+
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
1828
|
+
if (isTextCE(item) && !isBlockElement) {
|
|
1829
|
+
toMerge.push(item);
|
|
1830
|
+
}
|
|
1831
|
+
else {
|
|
1832
|
+
merge();
|
|
1833
|
+
merged.push(item);
|
|
1834
|
+
}
|
|
1835
|
+
}
|
|
1836
|
+
merge();
|
|
1837
|
+
return merged;
|
|
1838
|
+
}
|
|
1839
|
+
async handleTable(node) {
|
|
1840
|
+
const html = node.toString({ html: true });
|
|
1841
|
+
return [ContentElement.raw_html(html)];
|
|
1842
|
+
}
|
|
1843
|
+
async createQuote(node) {
|
|
1844
|
+
const items = await this.handleNested(node);
|
|
1845
|
+
return [ContentElement.quote(items)];
|
|
1846
|
+
}
|
|
1847
|
+
async createList(node, type) {
|
|
1848
|
+
const items = await this.handleNested(node);
|
|
1849
|
+
return [ContentElement.list(type, items)];
|
|
1850
|
+
}
|
|
1851
|
+
getNodeInnerText(node) {
|
|
1852
|
+
return node.children.map((n) => this.htmlFromNode(n).innerText.trim());
|
|
1853
|
+
}
|
|
1854
|
+
getNodeInnerHTML(node) {
|
|
1855
|
+
return node.children.map((n) => this.htmlFromNode(n).innerHTML.trim());
|
|
1856
|
+
}
|
|
1857
|
+
htmlFromNode(node) {
|
|
1858
|
+
return parse(node.toString({ html: true }));
|
|
1859
|
+
}
|
|
1860
|
+
getDecodedHTMLFromInnerNodes(node) {
|
|
1861
|
+
const encodedHtml = this.getNodeInnerText(node).join('');
|
|
1862
|
+
const decoded = decodeHTMLEntities(encodedHtml);
|
|
1863
|
+
return decoded;
|
|
1864
|
+
}
|
|
1865
|
+
isBlockElement(node) {
|
|
1866
|
+
const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
|
|
1867
|
+
if (defaultBlockElements.has(node.name))
|
|
1868
|
+
return true;
|
|
1869
|
+
}
|
|
1870
|
+
warn(metadata, message) {
|
|
1871
|
+
console.warn(metadata, message);
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
var index$3 = /*#__PURE__*/Object.freeze({
|
|
1876
|
+
__proto__: null,
|
|
1877
|
+
Constants: xml_constants,
|
|
1878
|
+
Utils: xml_utils,
|
|
1879
|
+
XMLProcessor: XMLProcessor
|
|
1880
|
+
});
|
|
1881
|
+
|
|
1882
|
+
var index$2 = /*#__PURE__*/Object.freeze({
|
|
1883
|
+
__proto__: null,
|
|
1884
|
+
ContentElement: ContentElement$1,
|
|
1885
|
+
HTML: index$4,
|
|
1886
|
+
XML: index$3
|
|
1887
|
+
});
|
|
1888
|
+
|
|
1889
|
+
/**
|
|
1890
|
+
* Base class for all arc entities, it provides common methods and properties
|
|
1891
|
+
* If you want to create a new entity subtype you should extend this class
|
|
1892
|
+
*
|
|
1893
|
+
* Use case: You want to migrate stories from BBC
|
|
1894
|
+
* You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
|
|
1895
|
+
* Then you can override the specific methods to enrich the story with the data from BBC
|
|
1896
|
+
*
|
|
1897
|
+
* To migrate it call .migrate() method
|
|
1898
|
+
*/
|
|
1899
|
+
class Document {
|
|
1510
1900
|
constructor() {
|
|
1511
1901
|
this.ans = null;
|
|
1512
1902
|
this.circulations = [];
|
|
@@ -1744,473 +2134,307 @@ class Story extends Document {
|
|
|
1744
2134
|
}
|
|
1745
2135
|
}
|
|
1746
2136
|
|
|
1747
|
-
var index$
|
|
2137
|
+
var index$1 = /*#__PURE__*/Object.freeze({
|
|
1748
2138
|
__proto__: null,
|
|
1749
2139
|
Document: Document,
|
|
1750
2140
|
Story: Story
|
|
1751
2141
|
});
|
|
1752
2142
|
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
];
|
|
2143
|
+
var ANSType;
|
|
2144
|
+
(function (ANSType) {
|
|
2145
|
+
ANSType["Story"] = "story";
|
|
2146
|
+
ANSType["Video"] = "video";
|
|
2147
|
+
ANSType["Tag"] = "tag";
|
|
2148
|
+
ANSType["Author"] = "author";
|
|
2149
|
+
ANSType["Gallery"] = "gallery";
|
|
2150
|
+
ANSType["Image"] = "image";
|
|
2151
|
+
ANSType["Redirect"] = "redirect";
|
|
2152
|
+
})(ANSType || (ANSType = {}));
|
|
2153
|
+
var MigrationStatus;
|
|
2154
|
+
(function (MigrationStatus) {
|
|
2155
|
+
MigrationStatus["Success"] = "Success";
|
|
2156
|
+
MigrationStatus["Queued"] = "Queued";
|
|
2157
|
+
MigrationStatus["Circulated"] = "Circulated";
|
|
2158
|
+
MigrationStatus["Published"] = "Published";
|
|
2159
|
+
MigrationStatus["Scheduled"] = "Scheduled";
|
|
2160
|
+
MigrationStatus["FailVideo"] = "FailVideo";
|
|
2161
|
+
MigrationStatus["FailImage"] = "FailImage";
|
|
2162
|
+
MigrationStatus["FailPhoto"] = "FailPhoto";
|
|
2163
|
+
MigrationStatus["FailStory"] = "FailStory";
|
|
2164
|
+
MigrationStatus["FailGallery"] = "FailGallery";
|
|
2165
|
+
MigrationStatus["FailAuthor"] = "FailAuthor";
|
|
2166
|
+
MigrationStatus["FailTag"] = "FailTag";
|
|
2167
|
+
MigrationStatus["ValidationFailed"] = "ValidationFailed";
|
|
2168
|
+
})(MigrationStatus || (MigrationStatus = {}));
|
|
2169
|
+
var SummarySortBy;
|
|
2170
|
+
(function (SummarySortBy) {
|
|
2171
|
+
SummarySortBy["CreateDate"] = "createDate";
|
|
2172
|
+
SummarySortBy["UpdateDate"] = "updateDate";
|
|
2173
|
+
SummarySortBy["Id"] = "id";
|
|
2174
|
+
})(SummarySortBy || (SummarySortBy = {}));
|
|
2175
|
+
var SummarySortOrder;
|
|
2176
|
+
(function (SummarySortOrder) {
|
|
2177
|
+
SummarySortOrder["ASC"] = "ASC";
|
|
2178
|
+
SummarySortOrder["DESC"] = "DESC";
|
|
2179
|
+
})(SummarySortOrder || (SummarySortOrder = {}));
|
|
1789
2180
|
|
|
1790
|
-
|
|
2181
|
+
/* eslint-disable */
|
|
2182
|
+
/**
|
|
2183
|
+
* This file was automatically generated by json-schema-to-typescript.
|
|
2184
|
+
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
|
|
2185
|
+
* and run json-schema-to-typescript to regenerate this file.
|
|
2186
|
+
*/
|
|
2187
|
+
|
|
2188
|
+
var ansTypes = /*#__PURE__*/Object.freeze({
|
|
2189
|
+
__proto__: null
|
|
2190
|
+
});
|
|
2191
|
+
|
|
2192
|
+
var utils = /*#__PURE__*/Object.freeze({
|
|
2193
|
+
__proto__: null
|
|
2194
|
+
});
|
|
2195
|
+
|
|
2196
|
+
var index = /*#__PURE__*/Object.freeze({
|
|
1791
2197
|
__proto__: null,
|
|
1792
|
-
|
|
2198
|
+
ANS: ansTypes,
|
|
2199
|
+
get ANSType () { return ANSType; },
|
|
2200
|
+
get MigrationStatus () { return MigrationStatus; },
|
|
2201
|
+
get SummarySortBy () { return SummarySortBy; },
|
|
2202
|
+
get SummarySortOrder () { return SummarySortOrder; },
|
|
2203
|
+
TypeUtils: utils
|
|
1793
2204
|
});
|
|
1794
2205
|
|
|
1795
|
-
const
|
|
1796
|
-
return
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
};
|
|
1804
|
-
const nodeTagIs = (node, name) => {
|
|
1805
|
-
return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
|
|
1806
|
-
};
|
|
1807
|
-
const nodeTagIn = (node, names) => {
|
|
1808
|
-
return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
|
|
1809
|
-
};
|
|
1810
|
-
const isTextCE = (ce) => {
|
|
1811
|
-
return ce?.type === 'text';
|
|
1812
|
-
};
|
|
1813
|
-
const decodeHTMLEntities = (str) => decode(str);
|
|
1814
|
-
const htmlToText = (html, parseOptions) => {
|
|
1815
|
-
if (!html)
|
|
1816
|
-
return '';
|
|
1817
|
-
const doc = parse(html, parseOptions);
|
|
1818
|
-
return decodeHTMLEntities(doc.innerText);
|
|
1819
|
-
};
|
|
1820
|
-
const getHTMLElementAttribute = (e, key) => {
|
|
1821
|
-
const value = e.getAttribute(key);
|
|
1822
|
-
if (value)
|
|
1823
|
-
return value;
|
|
1824
|
-
return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
|
|
2206
|
+
const reference = (ref) => {
|
|
2207
|
+
return {
|
|
2208
|
+
_id: ref.id,
|
|
2209
|
+
type: 'reference',
|
|
2210
|
+
referent: {
|
|
2211
|
+
...ref,
|
|
2212
|
+
},
|
|
2213
|
+
};
|
|
1825
2214
|
};
|
|
1826
2215
|
|
|
1827
|
-
var
|
|
2216
|
+
var ANS = /*#__PURE__*/Object.freeze({
|
|
1828
2217
|
__proto__: null,
|
|
1829
|
-
|
|
1830
|
-
getHTMLElementAttribute: getHTMLElementAttribute,
|
|
1831
|
-
htmlToText: htmlToText,
|
|
1832
|
-
isCommentNode: isCommentNode,
|
|
1833
|
-
isHTMLElement: isHTMLElement,
|
|
1834
|
-
isTextCE: isTextCE,
|
|
1835
|
-
isTextNode: isTextNode,
|
|
1836
|
-
nodeTagIn: nodeTagIn,
|
|
1837
|
-
nodeTagIs: nodeTagIs
|
|
2218
|
+
reference: reference
|
|
1838
2219
|
});
|
|
1839
2220
|
|
|
2221
|
+
const generateArcId = (identifier, orgHostname) => {
|
|
2222
|
+
const namespace = v5(orgHostname, v5.DNS);
|
|
2223
|
+
const buffer = v5(identifier, namespace, Buffer.alloc(16));
|
|
2224
|
+
return encode(buffer, 'RFC4648', { padding: false });
|
|
2225
|
+
};
|
|
1840
2226
|
/**
|
|
1841
|
-
*
|
|
1842
|
-
* It provides a flexible way to handle different HTML nodes and wrap text content.
|
|
1843
|
-
*
|
|
1844
|
-
* The processor can be extended with custom handlers for specific node types and
|
|
1845
|
-
* wrappers for text content.
|
|
2227
|
+
* Utility class for generating Arc IDs and source IDs
|
|
1846
2228
|
*
|
|
1847
2229
|
* @example
|
|
1848
2230
|
* ```ts
|
|
1849
|
-
*
|
|
1850
|
-
* const
|
|
1851
|
-
*
|
|
1852
|
-
*
|
|
1853
|
-
* // Parse HTML content
|
|
1854
|
-
* const html = '<div><p>Some text</p><img src="image.jpg"></div>';
|
|
1855
|
-
* const elements = await processor.parse(html);
|
|
2231
|
+
* const generator = new IdGenerator(['my-org']);
|
|
2232
|
+
* const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
|
|
2233
|
+
* const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
|
|
1856
2234
|
* ```
|
|
1857
|
-
*
|
|
1858
|
-
* The processor comes with built-in handlers for common HTML elements like links,
|
|
1859
|
-
* text formatting (i, u, strong), and block elements. Custom handlers can be added
|
|
1860
|
-
* using the `handle()` and `wrap()` methods.
|
|
1861
2235
|
*/
|
|
1862
|
-
class
|
|
1863
|
-
constructor() {
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
node: new Map(),
|
|
1867
|
-
wrap: new Map(),
|
|
1868
|
-
};
|
|
1869
|
-
}
|
|
1870
|
-
init() {
|
|
1871
|
-
// wrappers are used to wrap the content of nested text nodes
|
|
1872
|
-
// in a specific way
|
|
1873
|
-
this.wrap('link', (node, text) => {
|
|
1874
|
-
if (nodeTagIn(node, ['a'])) {
|
|
1875
|
-
const attributes = ['href', 'target', 'rel']
|
|
1876
|
-
.map((attr) => [attr, getHTMLElementAttribute(node, attr)])
|
|
1877
|
-
.filter(([_, value]) => value)
|
|
1878
|
-
.map(([key, value]) => `${key}="${value}"`)
|
|
1879
|
-
.join(' ');
|
|
1880
|
-
return {
|
|
1881
|
-
...text,
|
|
1882
|
-
content: `<a ${attributes}>${text.content}</a>`,
|
|
1883
|
-
};
|
|
1884
|
-
}
|
|
1885
|
-
});
|
|
1886
|
-
this.wrap('i', (node, text) => {
|
|
1887
|
-
if (nodeTagIn(node, ['i'])) {
|
|
1888
|
-
return {
|
|
1889
|
-
...text,
|
|
1890
|
-
content: `<i>${text.content}</i>`,
|
|
1891
|
-
};
|
|
1892
|
-
}
|
|
1893
|
-
});
|
|
1894
|
-
this.wrap('u', (node, text) => {
|
|
1895
|
-
if (nodeTagIn(node, ['u'])) {
|
|
1896
|
-
return {
|
|
1897
|
-
...text,
|
|
1898
|
-
content: `<u>${text.content}</u>`,
|
|
1899
|
-
};
|
|
1900
|
-
}
|
|
1901
|
-
});
|
|
1902
|
-
this.wrap('sup/sub', (node, text) => {
|
|
1903
|
-
if (nodeTagIn(node, ['sup', 'sub'])) {
|
|
1904
|
-
return {
|
|
1905
|
-
...text,
|
|
1906
|
-
content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
|
|
1907
|
-
};
|
|
1908
|
-
}
|
|
1909
|
-
});
|
|
1910
|
-
this.wrap('strong', (node, text) => {
|
|
1911
|
-
if (nodeTagIn(node, ['strong', 'b'])) {
|
|
1912
|
-
return {
|
|
1913
|
-
...text,
|
|
1914
|
-
content: `<b>${text.content}</b>`,
|
|
1915
|
-
};
|
|
1916
|
-
}
|
|
1917
|
-
});
|
|
1918
|
-
this.wrap('center', (node, text) => {
|
|
1919
|
-
if (nodeTagIn(node, ['center'])) {
|
|
1920
|
-
return {
|
|
1921
|
-
...text,
|
|
1922
|
-
alignment: 'center',
|
|
1923
|
-
};
|
|
1924
|
-
}
|
|
1925
|
-
});
|
|
1926
|
-
this.wrap('aligned-paragraph', (node, text) => {
|
|
1927
|
-
if (nodeTagIn(node, ['p'])) {
|
|
1928
|
-
const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
|
|
1929
|
-
if (!styleAttribute)
|
|
1930
|
-
return text;
|
|
1931
|
-
if (styleAttribute.includes('text-align: right;')) {
|
|
1932
|
-
return {
|
|
1933
|
-
...text,
|
|
1934
|
-
alignment: 'right',
|
|
1935
|
-
};
|
|
1936
|
-
}
|
|
1937
|
-
if (styleAttribute.includes('text-align: left;')) {
|
|
1938
|
-
return {
|
|
1939
|
-
...text,
|
|
1940
|
-
alignment: 'left',
|
|
1941
|
-
};
|
|
1942
|
-
}
|
|
1943
|
-
if (styleAttribute.includes('text-align: center;')) {
|
|
1944
|
-
return {
|
|
1945
|
-
...text,
|
|
1946
|
-
alignment: 'center',
|
|
1947
|
-
};
|
|
1948
|
-
}
|
|
1949
|
-
return text;
|
|
1950
|
-
}
|
|
1951
|
-
});
|
|
1952
|
-
// handlers are used to handle specific nodes
|
|
1953
|
-
// and return a list of content elements
|
|
1954
|
-
this.handle('default', (node) => {
|
|
1955
|
-
const noTag = isHTMLElement(node) && !node.tagName;
|
|
1956
|
-
if (noTag ||
|
|
1957
|
-
nodeTagIn(node, [
|
|
1958
|
-
'p',
|
|
1959
|
-
'a',
|
|
1960
|
-
'b',
|
|
1961
|
-
'sup',
|
|
1962
|
-
'sub',
|
|
1963
|
-
'span',
|
|
1964
|
-
'strong',
|
|
1965
|
-
'em',
|
|
1966
|
-
'i',
|
|
1967
|
-
'u',
|
|
1968
|
-
'section',
|
|
1969
|
-
'main',
|
|
1970
|
-
'div',
|
|
1971
|
-
'li',
|
|
1972
|
-
'center',
|
|
1973
|
-
])) {
|
|
1974
|
-
return this.handleNested(node);
|
|
1975
|
-
}
|
|
1976
|
-
});
|
|
1977
|
-
this.handle('headers', (node) => {
|
|
1978
|
-
if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
|
|
1979
|
-
return this.createHeader(node);
|
|
1980
|
-
}
|
|
1981
|
-
});
|
|
1982
|
-
this.handle('text', (node) => {
|
|
1983
|
-
if (isTextNode(node)) {
|
|
1984
|
-
return this.createText(node);
|
|
1985
|
-
}
|
|
1986
|
-
});
|
|
1987
|
-
this.handle('comment', (node) => {
|
|
1988
|
-
if (isCommentNode(node)) {
|
|
1989
|
-
return this.handleComment(node);
|
|
1990
|
-
}
|
|
1991
|
-
});
|
|
1992
|
-
this.handle('list', async (node) => {
|
|
1993
|
-
if (nodeTagIn(node, ['ul', 'ol'])) {
|
|
1994
|
-
const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
|
|
1995
|
-
return this.createList(node, listType);
|
|
1996
|
-
}
|
|
1997
|
-
});
|
|
1998
|
-
this.handle('table', (node) => {
|
|
1999
|
-
if (nodeTagIs(node, 'table')) {
|
|
2000
|
-
return this.handleTable(node);
|
|
2001
|
-
}
|
|
2002
|
-
});
|
|
2003
|
-
this.handle('iframe', (node) => {
|
|
2004
|
-
if (nodeTagIs(node, 'iframe')) {
|
|
2005
|
-
return this.handleIframe(node);
|
|
2006
|
-
}
|
|
2007
|
-
});
|
|
2008
|
-
this.handle('img', (node) => {
|
|
2009
|
-
if (nodeTagIs(node, 'img')) {
|
|
2010
|
-
return this.handleImage(node);
|
|
2011
|
-
}
|
|
2012
|
-
});
|
|
2013
|
-
this.handle('br', (node) => {
|
|
2014
|
-
if (nodeTagIs(node, 'br')) {
|
|
2015
|
-
return this.handleBreak(node);
|
|
2016
|
-
}
|
|
2017
|
-
});
|
|
2018
|
-
}
|
|
2019
|
-
handle(name, handler) {
|
|
2020
|
-
if (this.handlers.node.has(name)) {
|
|
2021
|
-
this.warn({ name }, `${name} node handler already set`);
|
|
2022
|
-
}
|
|
2023
|
-
this.handlers.node.set(name, handler);
|
|
2024
|
-
}
|
|
2025
|
-
wrap(name, handler) {
|
|
2026
|
-
if (this.handlers.wrap.has(name)) {
|
|
2027
|
-
this.warn({ name }, `${name} wrap handler already set`);
|
|
2236
|
+
class IdGenerator {
|
|
2237
|
+
constructor(namespaces) {
|
|
2238
|
+
if (!namespaces.length) {
|
|
2239
|
+
throw new Error('At least 1 namespace is required');
|
|
2028
2240
|
}
|
|
2029
|
-
this.
|
|
2241
|
+
this.namespace = namespaces.join('-');
|
|
2030
2242
|
}
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
doc.removeWhitespace();
|
|
2034
|
-
const elements = await this.process(doc);
|
|
2035
|
-
const filtered = elements?.filter((e) => e.type !== 'divider');
|
|
2036
|
-
return filtered || [];
|
|
2243
|
+
getArcId(id) {
|
|
2244
|
+
return generateArcId(id.toString(), this.namespace);
|
|
2037
2245
|
}
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
const parentNodeIsBlockElement = this.isBlockElement(parent);
|
|
2041
|
-
c.additional_properties = {
|
|
2042
|
-
...c.additional_properties,
|
|
2043
|
-
isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
|
|
2044
|
-
};
|
|
2045
|
-
return c;
|
|
2246
|
+
getSourceId(id, prefixes = []) {
|
|
2247
|
+
return [...prefixes, id].join('-');
|
|
2046
2248
|
}
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
var Id = /*#__PURE__*/Object.freeze({
|
|
2252
|
+
__proto__: null,
|
|
2253
|
+
IdGenerator: IdGenerator,
|
|
2254
|
+
generateArcId: generateArcId
|
|
2255
|
+
});
|
|
2256
|
+
|
|
2257
|
+
const buildTree = (items) => {
|
|
2258
|
+
const tree = [
|
|
2259
|
+
{
|
|
2260
|
+
id: '/',
|
|
2261
|
+
children: [],
|
|
2262
|
+
meta: new Proxy({}, {
|
|
2263
|
+
get: () => {
|
|
2264
|
+
throw new Error('Root node meta is not accessible');
|
|
2265
|
+
},
|
|
2266
|
+
}),
|
|
2267
|
+
parent: null,
|
|
2268
|
+
},
|
|
2269
|
+
];
|
|
2270
|
+
// Track nodes at each level to maintain parent-child relationships
|
|
2271
|
+
// stores last node at each level
|
|
2272
|
+
const currLevelNodes = {
|
|
2273
|
+
0: tree[0],
|
|
2274
|
+
};
|
|
2275
|
+
for (const item of items) {
|
|
2276
|
+
const node = {
|
|
2277
|
+
id: item.id,
|
|
2278
|
+
parent: null,
|
|
2279
|
+
children: [],
|
|
2280
|
+
meta: item,
|
|
2281
|
+
};
|
|
2282
|
+
// Determine the level of this node
|
|
2283
|
+
const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
|
|
2284
|
+
const level = Number(levelKey?.replace('N', '')) || 0;
|
|
2285
|
+
if (!level) {
|
|
2286
|
+
throw new Error(`Invalid level for section ${item.id}`);
|
|
2065
2287
|
}
|
|
2066
|
-
|
|
2288
|
+
// This is a child node - attach to its parent
|
|
2289
|
+
const parentLevel = level - 1;
|
|
2290
|
+
const parentNode = currLevelNodes[parentLevel];
|
|
2291
|
+
if (parentNode) {
|
|
2292
|
+
node.parent = parentNode;
|
|
2293
|
+
parentNode.children.push(node);
|
|
2294
|
+
}
|
|
2295
|
+
else {
|
|
2296
|
+
throw new Error(`Parent node not found for section ${item.id}`);
|
|
2297
|
+
}
|
|
2298
|
+
// Set this as the current node for its level
|
|
2299
|
+
currLevelNodes[level] = node;
|
|
2067
2300
|
}
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2301
|
+
// return root nodes children
|
|
2302
|
+
return tree[0].children;
|
|
2303
|
+
};
|
|
2304
|
+
const flattenTree = (tree) => {
|
|
2305
|
+
const flatten = [];
|
|
2306
|
+
const traverse = (node) => {
|
|
2307
|
+
flatten.push(node);
|
|
2308
|
+
for (const child of node.children) {
|
|
2309
|
+
traverse(child);
|
|
2310
|
+
}
|
|
2311
|
+
};
|
|
2312
|
+
// traverse all root nodes and their children
|
|
2313
|
+
for (const node of tree) {
|
|
2314
|
+
traverse(node);
|
|
2081
2315
|
}
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2316
|
+
return flatten;
|
|
2317
|
+
};
|
|
2318
|
+
const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
|
|
2319
|
+
const groupByWebsites = (sections) => {
|
|
2320
|
+
return sections.reduce((acc, section) => {
|
|
2321
|
+
const website = section._website;
|
|
2322
|
+
if (!acc[website])
|
|
2323
|
+
acc[website] = [];
|
|
2324
|
+
acc[website].push(section);
|
|
2325
|
+
return acc;
|
|
2326
|
+
}, {});
|
|
2327
|
+
};
|
|
2328
|
+
const references = (sections) => {
|
|
2329
|
+
return sections.map((s) => reference({
|
|
2330
|
+
id: s._id,
|
|
2331
|
+
website: s._website,
|
|
2332
|
+
type: 'section',
|
|
2333
|
+
}));
|
|
2334
|
+
};
|
|
2335
|
+
const isReference = (section) => {
|
|
2336
|
+
return section?.type === 'reference' && section?.referent?.type === 'section';
|
|
2337
|
+
};
|
|
2338
|
+
const removeDuplicates = (sections) => {
|
|
2339
|
+
const map = new Map();
|
|
2340
|
+
sections.forEach((s) => {
|
|
2341
|
+
if (isReference(s)) {
|
|
2342
|
+
map.set(`${s.referent.id}${s.referent.website}`, s);
|
|
2085
2343
|
}
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
children.push(await this.process(child));
|
|
2344
|
+
else {
|
|
2345
|
+
map.set(`${s._id}${s._website}`, s);
|
|
2089
2346
|
}
|
|
2090
|
-
|
|
2347
|
+
});
|
|
2348
|
+
return [...map.values()];
|
|
2349
|
+
};
|
|
2350
|
+
class SectionsRepository {
|
|
2351
|
+
constructor(arc) {
|
|
2352
|
+
this.arc = arc;
|
|
2353
|
+
this.sectionsByWebsite = {};
|
|
2354
|
+
this.websitesAreLoaded = false;
|
|
2091
2355
|
}
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
*
|
|
2097
|
-
* @param node - The HTML node to process
|
|
2098
|
-
* @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
|
|
2099
|
-
*/
|
|
2100
|
-
async process(node) {
|
|
2101
|
-
let isKnownNode = false;
|
|
2102
|
-
const elements = [];
|
|
2103
|
-
for (const [name, handler] of this.handlers.node.entries()) {
|
|
2104
|
-
try {
|
|
2105
|
-
const result = await handler(node);
|
|
2106
|
-
if (result) {
|
|
2107
|
-
// if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
|
|
2108
|
-
isKnownNode = true;
|
|
2109
|
-
elements.push(...result);
|
|
2110
|
-
break;
|
|
2111
|
-
}
|
|
2112
|
-
}
|
|
2113
|
-
catch (error) {
|
|
2114
|
-
this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
|
|
2115
|
-
}
|
|
2116
|
-
}
|
|
2117
|
-
if (isKnownNode)
|
|
2118
|
-
return elements;
|
|
2119
|
-
this.warn({ node: node.toString() }, 'UnknownNodeError');
|
|
2356
|
+
async put(ans) {
|
|
2357
|
+
await this.arc.Site.putSection(ans);
|
|
2358
|
+
const created = await this.arc.Site.getSection(ans._id, ans.website);
|
|
2359
|
+
this.save(created);
|
|
2120
2360
|
}
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
if (!toMerge.length)
|
|
2133
|
-
return;
|
|
2134
|
-
const paragraph = toMerge.reduce((acc, p) => {
|
|
2135
|
-
return {
|
|
2136
|
-
...p,
|
|
2137
|
-
content: acc.content + p.content,
|
|
2138
|
-
};
|
|
2139
|
-
}, { type: 'text', content: '' });
|
|
2140
|
-
merged.push(paragraph);
|
|
2141
|
-
toMerge = [];
|
|
2142
|
-
};
|
|
2143
|
-
for (let i = 0; i < items.length; i++) {
|
|
2144
|
-
const item = items[i];
|
|
2145
|
-
const isBlockElement = item.additional_properties?.isBlockElement;
|
|
2146
|
-
if (isTextCE(item) && !isBlockElement) {
|
|
2147
|
-
toMerge.push(item);
|
|
2361
|
+
async loadWebsite(website) {
|
|
2362
|
+
const sections = [];
|
|
2363
|
+
let next = true;
|
|
2364
|
+
let offset = 0;
|
|
2365
|
+
while (next) {
|
|
2366
|
+
const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
|
|
2367
|
+
return { q_results: [] };
|
|
2368
|
+
});
|
|
2369
|
+
if (migrated.q_results.length) {
|
|
2370
|
+
sections.push(...migrated.q_results);
|
|
2371
|
+
offset += migrated.q_results.length;
|
|
2148
2372
|
}
|
|
2149
2373
|
else {
|
|
2150
|
-
|
|
2151
|
-
merged.push(item);
|
|
2374
|
+
next = false;
|
|
2152
2375
|
}
|
|
2153
2376
|
}
|
|
2154
|
-
|
|
2155
|
-
return merged;
|
|
2156
|
-
}
|
|
2157
|
-
handleComment(_) {
|
|
2158
|
-
return [];
|
|
2159
|
-
}
|
|
2160
|
-
async handleTable(node) {
|
|
2161
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2162
|
-
}
|
|
2163
|
-
async handleIframe(node) {
|
|
2164
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2165
|
-
}
|
|
2166
|
-
async handleImage(node) {
|
|
2167
|
-
return [ContentElement.raw_html(node.toString())];
|
|
2168
|
-
}
|
|
2169
|
-
async handleBreak(_) {
|
|
2170
|
-
return [ContentElement.divider()];
|
|
2171
|
-
}
|
|
2172
|
-
async createQuote(node) {
|
|
2173
|
-
const items = await this.handleNested(node);
|
|
2174
|
-
return [ContentElement.quote(items)];
|
|
2377
|
+
return sections;
|
|
2175
2378
|
}
|
|
2176
|
-
async
|
|
2177
|
-
const
|
|
2178
|
-
|
|
2379
|
+
async loadWebsites(websites) {
|
|
2380
|
+
for (const website of websites) {
|
|
2381
|
+
this.sectionsByWebsite[website] = await this.loadWebsite(website);
|
|
2382
|
+
}
|
|
2383
|
+
this.websitesAreLoaded = true;
|
|
2179
2384
|
}
|
|
2180
|
-
|
|
2181
|
-
|
|
2385
|
+
save(section) {
|
|
2386
|
+
const website = section._website;
|
|
2387
|
+
assert.ok(website, 'Section must have a website');
|
|
2388
|
+
this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
|
|
2389
|
+
if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
|
|
2390
|
+
this.sectionsByWebsite[website].push(section);
|
|
2391
|
+
}
|
|
2182
2392
|
}
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2393
|
+
getById(id, website) {
|
|
2394
|
+
this.ensureWebsitesLoaded();
|
|
2395
|
+
const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
|
|
2396
|
+
return section;
|
|
2186
2397
|
}
|
|
2187
|
-
|
|
2188
|
-
|
|
2189
|
-
return [
|
|
2398
|
+
getByWebsite(website) {
|
|
2399
|
+
this.ensureWebsitesLoaded();
|
|
2400
|
+
return this.sectionsByWebsite[website];
|
|
2190
2401
|
}
|
|
2191
|
-
|
|
2192
|
-
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
|
|
2402
|
+
getParentSections(section) {
|
|
2403
|
+
this.ensureWebsitesLoaded();
|
|
2404
|
+
const parents = [];
|
|
2405
|
+
let current = section;
|
|
2406
|
+
while (current.parent?.default && current.parent.default !== '/') {
|
|
2407
|
+
const parent = this.getById(current.parent.default, section._website);
|
|
2408
|
+
if (!parent)
|
|
2409
|
+
break;
|
|
2410
|
+
parents.push(parent);
|
|
2411
|
+
current = parent;
|
|
2412
|
+
}
|
|
2413
|
+
return parents;
|
|
2196
2414
|
}
|
|
2197
|
-
|
|
2198
|
-
|
|
2415
|
+
ensureWebsitesLoaded() {
|
|
2416
|
+
assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
|
|
2199
2417
|
}
|
|
2200
2418
|
}
|
|
2201
2419
|
|
|
2202
|
-
var
|
|
2420
|
+
var Section = /*#__PURE__*/Object.freeze({
|
|
2203
2421
|
__proto__: null,
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2422
|
+
SectionsRepository: SectionsRepository,
|
|
2423
|
+
buildAndFlattenTree: buildAndFlattenTree,
|
|
2424
|
+
buildTree: buildTree,
|
|
2425
|
+
flattenTree: flattenTree,
|
|
2426
|
+
groupByWebsites: groupByWebsites,
|
|
2427
|
+
isReference: isReference,
|
|
2428
|
+
references: references,
|
|
2429
|
+
removeDuplicates: removeDuplicates
|
|
2207
2430
|
});
|
|
2208
2431
|
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2432
|
+
const ArcUtils = {
|
|
2433
|
+
Id,
|
|
2434
|
+
ANS,
|
|
2435
|
+
ContentElements,
|
|
2436
|
+
Section,
|
|
2437
|
+
};
|
|
2214
2438
|
|
|
2215
|
-
export { index$
|
|
2439
|
+
export { index$1 as AnsMapper, ArcAPI, ArcError, index as ArcTypes, ArcUtils, index$2 as ContentElements, WsClient, ArcAPI as default };
|
|
2216
2440
|
//# sourceMappingURL=index.js.map
|