@code.store/arcxp-sdk-ts 5.2.0 → 5.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -9,11 +9,12 @@ var fs = require('node:fs');
9
9
  var path = require('node:path');
10
10
  var FormData = require('form-data');
11
11
  var ws = require('ws');
12
+ var nodeHtmlParser = require('node-html-parser');
13
+ var htmlEntities = require('html-entities');
14
+ var xmldoc = require('xmldoc');
12
15
  var encode = require('base32-encode');
13
16
  var uuid = require('uuid');
14
17
  var assert = require('node:assert');
15
- var nodeHtmlParser = require('node-html-parser');
16
- var htmlEntities = require('html-entities');
17
18
 
18
19
  function _interopNamespaceDefault(e) {
19
20
  var n = Object.create(null);
@@ -34,6 +35,7 @@ function _interopNamespaceDefault(e) {
34
35
 
35
36
  var rateLimit__namespace = /*#__PURE__*/_interopNamespaceDefault(rateLimit);
36
37
  var ws__namespace = /*#__PURE__*/_interopNamespaceDefault(ws);
38
+ var xmldoc__namespace = /*#__PURE__*/_interopNamespaceDefault(xmldoc);
37
39
 
38
40
  const safeJSONStringify = (data) => {
39
41
  try {
@@ -137,28 +139,6 @@ class ArcAuthor extends ArcAbstractAPI {
137
139
  }
138
140
  }
139
141
 
140
- class ArcContentOps extends ArcAbstractAPI {
141
- constructor(options) {
142
- super({ ...options, apiPath: 'contentops/v1' });
143
- }
144
- async schedulePublish(payload) {
145
- const { data } = await this.client.put('/publish', payload);
146
- return data;
147
- }
148
- async scheduleUnpublish(payload) {
149
- const { data } = await this.client.put('/unpublish', payload);
150
- return data;
151
- }
152
- async unscheduleUnpublish(payload) {
153
- const { data } = await this.client.put('/unschedule_unpublish', payload);
154
- return data;
155
- }
156
- async unschedulePublish(payload) {
157
- const { data } = await this.client.put('/unschedule_publish', payload);
158
- return data;
159
- }
160
- }
161
-
162
142
  class ArcContent extends ArcAbstractAPI {
163
143
  constructor(options) {
164
144
  super({ ...options, apiPath: 'content/v4' });
@@ -183,6 +163,28 @@ class ArcContent extends ArcAbstractAPI {
183
163
  }
184
164
  }
185
165
 
166
+ class ArcContentOps extends ArcAbstractAPI {
167
+ constructor(options) {
168
+ super({ ...options, apiPath: 'contentops/v1' });
169
+ }
170
+ async schedulePublish(payload) {
171
+ const { data } = await this.client.put('/publish', payload);
172
+ return data;
173
+ }
174
+ async scheduleUnpublish(payload) {
175
+ const { data } = await this.client.put('/unpublish', payload);
176
+ return data;
177
+ }
178
+ async unscheduleUnpublish(payload) {
179
+ const { data } = await this.client.put('/unschedule_unpublish', payload);
180
+ return data;
181
+ }
182
+ async unschedulePublish(payload) {
183
+ const { data } = await this.client.put('/unschedule_publish', payload);
184
+ return data;
185
+ }
186
+ }
187
+
186
188
  class Custom extends ArcAbstractAPI {
187
189
  constructor(options) {
188
190
  super({ ...options, apiPath: '' });
@@ -196,6 +198,128 @@ class Custom extends ArcAbstractAPI {
196
198
  }
197
199
  }
198
200
 
201
+ class ArcDeveloperRetail extends ArcAbstractAPI {
202
+ constructor(options) {
203
+ super({ ...options, apiPath: 'retail/api/v1' });
204
+ }
205
+ // ============================================
206
+ // Product Methods
207
+ // ============================================
208
+ async getProductById(id, params) {
209
+ const { data } = await this.client.get(`/product/${id}`, { params });
210
+ return data;
211
+ }
212
+ async getProductBySku(sku, params) {
213
+ const { data } = await this.client.get(`/product/sku/${sku}`, { params });
214
+ return data;
215
+ }
216
+ async getProductByPriceCode(priceCode, params) {
217
+ const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
218
+ return data;
219
+ }
220
+ async getAllProducts(params) {
221
+ const { data } = await this.client.get('/product', { params });
222
+ return data;
223
+ }
224
+ // ============================================
225
+ // Pricing Strategy Methods
226
+ // ============================================
227
+ async getPricingStrategyById(id, params) {
228
+ const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
229
+ return data;
230
+ }
231
+ async getAllPricingStrategies(params) {
232
+ const { data } = await this.client.get('/pricing/strategy', { params });
233
+ return data;
234
+ }
235
+ // ============================================
236
+ // Pricing Rate Methods
237
+ // ============================================
238
+ async getPricingRateById(id, params) {
239
+ const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
240
+ return data;
241
+ }
242
+ async getAllPricingRates(params) {
243
+ const { data } = await this.client.get('/pricing/rate', { params });
244
+ return data;
245
+ }
246
+ // ============================================
247
+ // Pricing Cycle Methods
248
+ // ============================================
249
+ async getPricingCycle(priceCode, cycleIndex, startDate, params) {
250
+ const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
251
+ params,
252
+ });
253
+ return data;
254
+ }
255
+ // ============================================
256
+ // Campaign Methods
257
+ // ============================================
258
+ async getCampaignById(id, params) {
259
+ const { data } = await this.client.get(`/campaign/${id}`, { params });
260
+ return data;
261
+ }
262
+ async getCampaignByName(campaignName, params) {
263
+ const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
264
+ return data;
265
+ }
266
+ async getAllCampaigns(params) {
267
+ const { data } = await this.client.get('/campaign', { params });
268
+ return data;
269
+ }
270
+ // ============================================
271
+ // Campaign Category Methods
272
+ // ============================================
273
+ async getCampaignCategoryById(id, params) {
274
+ const { data } = await this.client.get(`/campaign/category/${id}`, { params });
275
+ return data;
276
+ }
277
+ async getAllCampaignCategories(params) {
278
+ const { data } = await this.client.get('/campaign/category', { params });
279
+ return data;
280
+ }
281
+ // ============================================
282
+ // Offer Methods
283
+ // ============================================
284
+ async getOfferById(id, params) {
285
+ const { data } = await this.client.get(`/offer/${id}`, { params });
286
+ return data;
287
+ }
288
+ async getAllOffers(params) {
289
+ const { data } = await this.client.get('/offer', { params });
290
+ return data;
291
+ }
292
+ // ============================================
293
+ // Offer Attribute Methods
294
+ // ============================================
295
+ async getOfferAttributeById(id, params) {
296
+ const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
297
+ return data;
298
+ }
299
+ async getAllOfferAttributes(params) {
300
+ const { data } = await this.client.get('/offer/attribute', { params });
301
+ return data;
302
+ }
303
+ // ============================================
304
+ // Product Attribute Methods
305
+ // ============================================
306
+ async getProductAttributeById(id, params) {
307
+ const { data } = await this.client.get(`/product/attribute/${id}`, { params });
308
+ return data;
309
+ }
310
+ async getAllProductAttributes(params) {
311
+ const { data } = await this.client.get('/product/attribute', { params });
312
+ return data;
313
+ }
314
+ // ============================================
315
+ // Condition Category Methods
316
+ // ============================================
317
+ async getAllConditionCategories(params) {
318
+ const { data } = await this.client.get('/condition/categories', { params });
319
+ return data;
320
+ }
321
+ }
322
+
199
323
  class ArcDraft extends ArcAbstractAPI {
200
324
  constructor(options) {
201
325
  super({ ...options, apiPath: 'draft/v1' });
@@ -592,190 +716,68 @@ class ArcRetailEvents {
592
716
  }
593
717
  }
594
718
 
595
- class ArcDeveloperRetail extends ArcAbstractAPI {
719
+ class ArcSales extends ArcAbstractAPI {
596
720
  constructor(options) {
597
- super({ ...options, apiPath: 'retail/api/v1' });
721
+ super({ ...options, apiPath: 'sales/api/v1' });
598
722
  }
599
- // ============================================
600
- // Product Methods
601
- // ============================================
602
- async getProductById(id, params) {
603
- const { data } = await this.client.get(`/product/${id}`, { params });
723
+ async migrate(params, payload) {
724
+ const FormData = await platform.form_data();
725
+ const form = new FormData();
726
+ form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
727
+ const { data } = await this.client.post('/migrate', form, {
728
+ params,
729
+ headers: {
730
+ ...form.getHeaders(),
731
+ },
732
+ });
604
733
  return data;
605
734
  }
606
- async getProductBySku(sku, params) {
607
- const { data } = await this.client.get(`/product/sku/${sku}`, { params });
608
- return data;
735
+ }
736
+ class ArcSalesV2 extends ArcAbstractAPI {
737
+ constructor(options) {
738
+ super({ ...options, apiPath: 'sales/api/v2' });
609
739
  }
610
- async getProductByPriceCode(priceCode, params) {
611
- const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
740
+ async getEnterpriseGroups(params) {
741
+ const { data } = await this.client.get('/subscriptions/enterprise', {
742
+ params: {
743
+ 'arc-site': params.site,
744
+ },
745
+ });
612
746
  return data;
613
747
  }
614
- async getAllProducts(params) {
615
- const { data } = await this.client.get('/product', { params });
748
+ async createEnterpriseGroup(params, payload) {
749
+ const { data } = await this.client.post('/subscriptions/enterprise', payload, {
750
+ params: {
751
+ 'arc-site': params.site,
752
+ },
753
+ });
616
754
  return data;
617
755
  }
618
- // ============================================
619
- // Pricing Strategy Methods
620
- // ============================================
621
- async getPricingStrategyById(id, params) {
622
- const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
756
+ async createNonce(website, enterpriseGroupId) {
757
+ const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
758
+ params: { 'arc-site': website },
759
+ });
623
760
  return data;
624
761
  }
625
- async getAllPricingStrategies(params) {
626
- const { data } = await this.client.get('/pricing/strategy', { params });
627
- return data;
762
+ }
763
+
764
+ class ArcSigningService extends ArcAbstractAPI {
765
+ constructor(options) {
766
+ super({ ...options, apiPath: 'signing-service' });
628
767
  }
629
- // ============================================
630
- // Pricing Rate Methods
631
- // ============================================
632
- async getPricingRateById(id, params) {
633
- const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
768
+ async sign(service, serviceVersion, imageId) {
769
+ const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
634
770
  return data;
635
771
  }
636
- async getAllPricingRates(params) {
637
- const { data } = await this.client.get('/pricing/rate', { params });
638
- return data;
772
+ }
773
+
774
+ class ArcSite extends ArcAbstractAPI {
775
+ constructor(options) {
776
+ super({ ...options, apiPath: 'site/v3' });
639
777
  }
640
- // ============================================
641
- // Pricing Cycle Methods
642
- // ============================================
643
- async getPricingCycle(priceCode, cycleIndex, startDate, params) {
644
- const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
645
- params,
646
- });
647
- return data;
648
- }
649
- // ============================================
650
- // Campaign Methods
651
- // ============================================
652
- async getCampaignById(id, params) {
653
- const { data } = await this.client.get(`/campaign/${id}`, { params });
654
- return data;
655
- }
656
- async getCampaignByName(campaignName, params) {
657
- const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
658
- return data;
659
- }
660
- async getAllCampaigns(params) {
661
- const { data } = await this.client.get('/campaign', { params });
662
- return data;
663
- }
664
- // ============================================
665
- // Campaign Category Methods
666
- // ============================================
667
- async getCampaignCategoryById(id, params) {
668
- const { data } = await this.client.get(`/campaign/category/${id}`, { params });
669
- return data;
670
- }
671
- async getAllCampaignCategories(params) {
672
- const { data } = await this.client.get('/campaign/category', { params });
673
- return data;
674
- }
675
- // ============================================
676
- // Offer Methods
677
- // ============================================
678
- async getOfferById(id, params) {
679
- const { data } = await this.client.get(`/offer/${id}`, { params });
680
- return data;
681
- }
682
- async getAllOffers(params) {
683
- const { data } = await this.client.get('/offer', { params });
684
- return data;
685
- }
686
- // ============================================
687
- // Offer Attribute Methods
688
- // ============================================
689
- async getOfferAttributeById(id, params) {
690
- const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
691
- return data;
692
- }
693
- async getAllOfferAttributes(params) {
694
- const { data } = await this.client.get('/offer/attribute', { params });
695
- return data;
696
- }
697
- // ============================================
698
- // Product Attribute Methods
699
- // ============================================
700
- async getProductAttributeById(id, params) {
701
- const { data } = await this.client.get(`/product/attribute/${id}`, { params });
702
- return data;
703
- }
704
- async getAllProductAttributes(params) {
705
- const { data } = await this.client.get('/product/attribute', { params });
706
- return data;
707
- }
708
- // ============================================
709
- // Condition Category Methods
710
- // ============================================
711
- async getAllConditionCategories(params) {
712
- const { data } = await this.client.get('/condition/categories', { params });
713
- return data;
714
- }
715
- }
716
-
717
- class ArcSales extends ArcAbstractAPI {
718
- constructor(options) {
719
- super({ ...options, apiPath: 'sales/api/v1' });
720
- }
721
- async migrate(params, payload) {
722
- const FormData = await platform.form_data();
723
- const form = new FormData();
724
- form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
725
- const { data } = await this.client.post('/migrate', form, {
726
- params,
727
- headers: {
728
- ...form.getHeaders(),
729
- },
730
- });
731
- return data;
732
- }
733
- }
734
- class ArcSalesV2 extends ArcAbstractAPI {
735
- constructor(options) {
736
- super({ ...options, apiPath: 'sales/api/v2' });
737
- }
738
- async getEnterpriseGroups(params) {
739
- const { data } = await this.client.get('/subscriptions/enterprise', {
740
- params: {
741
- 'arc-site': params.site,
742
- },
743
- });
744
- return data;
745
- }
746
- async createEnterpriseGroup(params, payload) {
747
- const { data } = await this.client.post('/subscriptions/enterprise', payload, {
748
- params: {
749
- 'arc-site': params.site,
750
- },
751
- });
752
- return data;
753
- }
754
- async createNonce(website, enterpriseGroupId) {
755
- const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
756
- params: { 'arc-site': website },
757
- });
758
- return data;
759
- }
760
- }
761
-
762
- class ArcSigningService extends ArcAbstractAPI {
763
- constructor(options) {
764
- super({ ...options, apiPath: 'signing-service' });
765
- }
766
- async sign(service, serviceVersion, imageId) {
767
- const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
768
- return data;
769
- }
770
- }
771
-
772
- class ArcSite extends ArcAbstractAPI {
773
- constructor(options) {
774
- super({ ...options, apiPath: 'site/v3' });
775
- }
776
- async getSections(params) {
777
- const { data } = await this.client.get(`/website/${params.website}/section`, {
778
- params: { _website: params.website, ...params },
778
+ async getSections(params) {
779
+ const { data } = await this.client.get(`/website/${params.website}/section`, {
780
+ params: { _website: params.website, ...params },
779
781
  });
780
782
  return data;
781
783
  }
@@ -913,85 +915,7 @@ const ArcAPI = (options) => {
913
915
  return API;
914
916
  };
915
917
 
916
- /* eslint-disable */
917
- /**
918
- * This file was automatically generated by json-schema-to-typescript.
919
- * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
920
- * and run json-schema-to-typescript to regenerate this file.
921
- */
922
-
923
- var ansTypes = /*#__PURE__*/Object.freeze({
924
- __proto__: null
925
- });
926
-
927
- var utils = /*#__PURE__*/Object.freeze({
928
- __proto__: null
929
- });
930
-
931
- var ANSType;
932
- (function (ANSType) {
933
- ANSType["Story"] = "story";
934
- ANSType["Video"] = "video";
935
- ANSType["Tag"] = "tag";
936
- ANSType["Author"] = "author";
937
- ANSType["Gallery"] = "gallery";
938
- ANSType["Image"] = "image";
939
- ANSType["Redirect"] = "redirect";
940
- })(ANSType || (ANSType = {}));
941
- var MigrationStatus;
942
- (function (MigrationStatus) {
943
- MigrationStatus["Success"] = "Success";
944
- MigrationStatus["Queued"] = "Queued";
945
- MigrationStatus["Circulated"] = "Circulated";
946
- MigrationStatus["Published"] = "Published";
947
- MigrationStatus["Scheduled"] = "Scheduled";
948
- MigrationStatus["FailVideo"] = "FailVideo";
949
- MigrationStatus["FailImage"] = "FailImage";
950
- MigrationStatus["FailPhoto"] = "FailPhoto";
951
- MigrationStatus["FailStory"] = "FailStory";
952
- MigrationStatus["FailGallery"] = "FailGallery";
953
- MigrationStatus["FailAuthor"] = "FailAuthor";
954
- MigrationStatus["FailTag"] = "FailTag";
955
- MigrationStatus["ValidationFailed"] = "ValidationFailed";
956
- })(MigrationStatus || (MigrationStatus = {}));
957
- var SummarySortBy;
958
- (function (SummarySortBy) {
959
- SummarySortBy["CreateDate"] = "createDate";
960
- SummarySortBy["UpdateDate"] = "updateDate";
961
- SummarySortBy["Id"] = "id";
962
- })(SummarySortBy || (SummarySortBy = {}));
963
- var SummarySortOrder;
964
- (function (SummarySortOrder) {
965
- SummarySortOrder["ASC"] = "ASC";
966
- SummarySortOrder["DESC"] = "DESC";
967
- })(SummarySortOrder || (SummarySortOrder = {}));
968
-
969
- var index$3 = /*#__PURE__*/Object.freeze({
970
- __proto__: null,
971
- ANS: ansTypes,
972
- get ANSType () { return ANSType; },
973
- get MigrationStatus () { return MigrationStatus; },
974
- get SummarySortBy () { return SummarySortBy; },
975
- get SummarySortOrder () { return SummarySortOrder; },
976
- TypeUtils: utils
977
- });
978
-
979
- const reference = (ref) => {
980
- return {
981
- _id: ref.id,
982
- type: 'reference',
983
- referent: {
984
- ...ref,
985
- },
986
- };
987
- };
988
-
989
- var ANS = /*#__PURE__*/Object.freeze({
990
- __proto__: null,
991
- reference: reference
992
- });
993
-
994
- const ContentElement = {
918
+ const ContentElement$1 = {
995
919
  divider: () => {
996
920
  return {
997
921
  type: 'divider',
@@ -1223,18 +1147,60 @@ const ContentElement = {
1223
1147
  },
1224
1148
  };
1225
1149
 
1150
+ const BLOCK_ELEMENT_TAGS$1 = [
1151
+ 'ADDRESS',
1152
+ 'ARTICLE',
1153
+ 'ASIDE',
1154
+ 'BLOCKQUOTE',
1155
+ 'DETAILS',
1156
+ 'DIV',
1157
+ 'DL',
1158
+ 'FIELDSET',
1159
+ 'FIGCAPTION',
1160
+ 'FIGURE',
1161
+ 'FOOTER',
1162
+ 'FORM',
1163
+ 'H1',
1164
+ 'H2',
1165
+ 'H3',
1166
+ 'H4',
1167
+ 'H5',
1168
+ 'H6',
1169
+ 'HEADER',
1170
+ 'HR',
1171
+ 'LINE',
1172
+ 'MAIN',
1173
+ 'MENU',
1174
+ 'NAV',
1175
+ 'OL',
1176
+ 'P',
1177
+ 'PARAGRAPH',
1178
+ 'PRE',
1179
+ 'SECTION',
1180
+ 'TABLE',
1181
+ 'UL',
1182
+ 'LI',
1183
+ 'BODY',
1184
+ 'HTML',
1185
+ ];
1186
+
1187
+ var html_constants = /*#__PURE__*/Object.freeze({
1188
+ __proto__: null,
1189
+ BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS$1
1190
+ });
1191
+
1226
1192
  const socialRegExps = {
1227
- instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9\.\_\-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-\_\.]+)\/?([0-9]+)?/,
1228
- twitter: /https:\/\/(?:www\.)?twitter\.com\/[^\/]+\/status(?:es)?\/(\d+)/,
1229
- tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|\&item_id=)(\d+))|\w+)/,
1230
- facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^\/]+\/(activity|posts))[\/?].*$/,
1231
- facebookVideo: /https:\/\/www\.facebook\.com\/([^\/?].+\/)?video(s|\.php)[\/?].*/,
1193
+ instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9._-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-_.]+)\/?([0-9]+)?/,
1194
+ twitter: /https:\/\/(?:www\.)?twitter\.com\/[^/]+\/status(?:es)?\/(\d+)/,
1195
+ tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|&item_id=)(\d+))|\w+)/,
1196
+ facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^/]+\/(activity|posts))[/?].*$/,
1197
+ facebookVideo: /https:\/\/www\.facebook\.com\/([^/?].+\/)?video(s|\.php)[/?].*/,
1232
1198
  };
1233
1199
  function match(url, regex) {
1234
1200
  return url.match(regex)?.[0];
1235
1201
  }
1236
1202
  function youtubeURLParser(url = '') {
1237
- const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
1203
+ const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
1238
1204
  const id = url?.match(regExp)?.[1];
1239
1205
  if (id) {
1240
1206
  return `https://youtu.be/${id}`;
@@ -1264,273 +1230,698 @@ function createSocial(url = '') {
1264
1230
  const embeds = [];
1265
1231
  const instagram = instagramURLParser(url);
1266
1232
  if (instagram) {
1267
- embeds.push(ContentElement.instagram(instagram));
1233
+ embeds.push(ContentElement$1.instagram(instagram));
1268
1234
  }
1269
1235
  const twitter = twitterURLParser(url);
1270
1236
  if (twitter) {
1271
- embeds.push(ContentElement.twitter(twitter));
1237
+ embeds.push(ContentElement$1.twitter(twitter));
1272
1238
  }
1273
1239
  const tiktok = tiktokURLParser(url);
1274
1240
  if (tiktok) {
1275
- embeds.push(ContentElement.tiktok(tiktok));
1241
+ embeds.push(ContentElement$1.tiktok(tiktok));
1276
1242
  }
1277
1243
  const youtube = youtubeURLParser(url);
1278
1244
  if (youtube) {
1279
- embeds.push(ContentElement.youtube(youtube));
1245
+ embeds.push(ContentElement$1.youtube(youtube));
1280
1246
  }
1281
1247
  const facebookPost = facebookPostURLParser(url);
1282
1248
  if (facebookPost) {
1283
- embeds.push(ContentElement.facebook_post(facebookPost));
1249
+ embeds.push(ContentElement$1.facebook_post(facebookPost));
1284
1250
  }
1285
1251
  const facebookVideo = facebookVideoURLParser(url);
1286
1252
  if (facebookVideo) {
1287
- embeds.push(ContentElement.facebook_video(facebookVideo));
1253
+ embeds.push(ContentElement$1.facebook_video(facebookVideo));
1288
1254
  }
1289
1255
  return embeds;
1290
1256
  }
1291
1257
  const randomId = () => `${new Date().toISOString()}-${Math.random()}`;
1258
+ const isTextCE = (ce) => {
1259
+ return ce?.type === 'text';
1260
+ };
1261
+ const decodeHTMLEntities = (str) => htmlEntities.decode(str);
1292
1262
 
1293
1263
  var ContentElements = /*#__PURE__*/Object.freeze({
1294
1264
  __proto__: null,
1295
1265
  createSocial: createSocial,
1266
+ decodeHTMLEntities: decodeHTMLEntities,
1296
1267
  facebookPostURLParser: facebookPostURLParser,
1297
1268
  facebookVideoURLParser: facebookVideoURLParser,
1298
1269
  instagramURLParser: instagramURLParser,
1270
+ isTextCE: isTextCE,
1299
1271
  randomId: randomId,
1300
1272
  tiktokURLParser: tiktokURLParser,
1301
1273
  twitterURLParser: twitterURLParser,
1302
1274
  youtubeURLParser: youtubeURLParser
1303
1275
  });
1304
1276
 
1305
- const generateArcId = (identifier, orgHostname) => {
1306
- const namespace = uuid.v5(orgHostname, uuid.v5.DNS);
1307
- const buffer = uuid.v5(identifier, namespace, Buffer.alloc(16));
1308
- return encode(buffer, 'RFC4648', { padding: false });
1277
+ const isTextNode$1 = (node) => {
1278
+ return node instanceof nodeHtmlParser.TextNode;
1309
1279
  };
1310
- /**
1311
- * Utility class for generating Arc IDs and source IDs
1312
- *
1313
- * @example
1314
- * ```ts
1315
- * const generator = new IdGenerator(['my-org']);
1316
- * const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
1317
- * const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
1318
- * ```
1319
- */
1320
- class IdGenerator {
1321
- constructor(namespaces) {
1322
- if (!namespaces.length) {
1323
- throw new Error('At least 1 namespace is required');
1324
- }
1325
- this.namespace = namespaces.join('-');
1326
- }
1327
- getArcId(id) {
1328
- return generateArcId(id.toString(), this.namespace);
1329
- }
1330
- getSourceId(id, prefixes = []) {
1331
- return [...prefixes, id].join('-');
1332
- }
1333
- }
1334
-
1335
- var Id = /*#__PURE__*/Object.freeze({
1336
- __proto__: null,
1337
- IdGenerator: IdGenerator,
1338
- generateArcId: generateArcId
1339
- });
1340
-
1341
- const buildTree = (items) => {
1342
- const tree = [
1343
- {
1344
- id: '/',
1345
- children: [],
1346
- meta: new Proxy({}, {
1347
- get: () => {
1348
- throw new Error('Root node meta is not accessible');
1349
- },
1350
- }),
1351
- parent: null,
1352
- },
1353
- ];
1354
- // Track nodes at each level to maintain parent-child relationships
1355
- // stores last node at each level
1356
- const currLevelNodes = {
1357
- 0: tree[0],
1358
- };
1359
- for (const item of items) {
1360
- const node = {
1361
- id: item.id,
1362
- parent: null,
1363
- children: [],
1364
- meta: item,
1365
- };
1366
- // Determine the level of this node
1367
- const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
1368
- const level = Number(levelKey?.replace('N', '')) || 0;
1369
- if (!level) {
1370
- throw new Error(`Invalid level for section ${item.id}`);
1371
- }
1372
- // This is a child node - attach to its parent
1373
- const parentLevel = level - 1;
1374
- const parentNode = currLevelNodes[parentLevel];
1375
- if (parentNode) {
1376
- node.parent = parentNode;
1377
- parentNode.children.push(node);
1378
- }
1379
- else {
1380
- throw new Error(`Parent node not found for section ${item.id}`);
1381
- }
1382
- // Set this as the current node for its level
1383
- currLevelNodes[level] = node;
1384
- }
1385
- // return root nodes children
1386
- return tree[0].children;
1280
+ const isHTMLElement = (node) => {
1281
+ return node instanceof nodeHtmlParser.HTMLElement;
1387
1282
  };
1388
- const flattenTree = (tree) => {
1389
- const flatten = [];
1390
- const traverse = (node) => {
1391
- flatten.push(node);
1392
- for (const child of node.children) {
1393
- traverse(child);
1394
- }
1395
- };
1396
- // traverse all root nodes and their children
1397
- for (const node of tree) {
1398
- traverse(node);
1399
- }
1400
- return flatten;
1283
+ const isCommentNode = (node) => {
1284
+ return node instanceof nodeHtmlParser.CommentNode;
1401
1285
  };
1402
- const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
1403
- const groupByWebsites = (sections) => {
1404
- return sections.reduce((acc, section) => {
1405
- const website = section._website;
1406
- if (!acc[website])
1407
- acc[website] = [];
1408
- acc[website].push(section);
1409
- return acc;
1410
- }, {});
1286
+ const nodeTagIs = (node, name) => {
1287
+ return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
1411
1288
  };
1412
- const references = (sections) => {
1413
- return sections.map((s) => reference({
1414
- id: s._id,
1415
- website: s._website,
1416
- type: 'section',
1417
- }));
1289
+ const nodeTagIn = (node, names) => {
1290
+ return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
1418
1291
  };
1419
- const isReference = (section) => {
1420
- return section?.type === 'reference' && section?.referent?.type === 'section';
1292
+ const htmlToText = (html, parseOptions) => {
1293
+ if (!html)
1294
+ return '';
1295
+ const doc = nodeHtmlParser.parse(html, parseOptions);
1296
+ return decodeHTMLEntities(doc.innerText);
1421
1297
  };
1422
- const removeDuplicates = (sections) => {
1423
- const map = new Map();
1424
- sections.forEach((s) => {
1425
- if (isReference(s)) {
1426
- map.set(`${s.referent.id}${s.referent.website}`, s);
1427
- }
1428
- else {
1429
- map.set(`${s._id}${s._website}`, s);
1430
- }
1431
- });
1432
- return [...map.values()];
1298
+ const getHTMLElementAttribute = (e, key) => {
1299
+ const value = e.getAttribute(key);
1300
+ if (value)
1301
+ return value;
1302
+ return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
1433
1303
  };
1434
- class SectionsRepository {
1435
- constructor(arc) {
1436
- this.arc = arc;
1437
- this.sectionsByWebsite = {};
1438
- this.websitesAreLoaded = false;
1439
- }
1440
- async put(ans) {
1441
- await this.arc.Site.putSection(ans);
1442
- const created = await this.arc.Site.getSection(ans._id, ans.website);
1443
- this.save(created);
1444
- }
1445
- async loadWebsite(website) {
1446
- const sections = [];
1447
- let next = true;
1448
- let offset = 0;
1449
- while (next) {
1450
- const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
1451
- return { q_results: [] };
1452
- });
1453
- if (migrated.q_results.length) {
1454
- sections.push(...migrated.q_results);
1455
- offset += migrated.q_results.length;
1456
- }
1457
- else {
1458
- next = false;
1459
- }
1460
- }
1461
- return sections;
1462
- }
1463
- async loadWebsites(websites) {
1464
- for (const website of websites) {
1465
- this.sectionsByWebsite[website] = await this.loadWebsite(website);
1466
- }
1467
- this.websitesAreLoaded = true;
1468
- }
1469
- save(section) {
1470
- const website = section._website;
1471
- assert.ok(website, 'Section must have a website');
1472
- this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
1473
- if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
1474
- this.sectionsByWebsite[website].push(section);
1475
- }
1476
- }
1477
- getById(id, website) {
1478
- this.ensureWebsitesLoaded();
1479
- const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
1480
- return section;
1481
- }
1482
- getByWebsite(website) {
1483
- this.ensureWebsitesLoaded();
1484
- return this.sectionsByWebsite[website];
1485
- }
1486
- getParentSections(section) {
1487
- this.ensureWebsitesLoaded();
1488
- const parents = [];
1489
- let current = section;
1490
- while (current.parent?.default && current.parent.default !== '/') {
1491
- const parent = this.getById(current.parent.default, section._website);
1492
- if (!parent)
1493
- break;
1494
- parents.push(parent);
1495
- current = parent;
1496
- }
1497
- return parents;
1498
- }
1499
- ensureWebsitesLoaded() {
1500
- assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
1501
- }
1502
- }
1503
1304
 
1504
- var Section = /*#__PURE__*/Object.freeze({
1305
+ var html_utils = /*#__PURE__*/Object.freeze({
1505
1306
  __proto__: null,
1506
- SectionsRepository: SectionsRepository,
1507
- buildAndFlattenTree: buildAndFlattenTree,
1508
- buildTree: buildTree,
1509
- flattenTree: flattenTree,
1510
- groupByWebsites: groupByWebsites,
1511
- isReference: isReference,
1512
- references: references,
1513
- removeDuplicates: removeDuplicates
1307
+ getHTMLElementAttribute: getHTMLElementAttribute,
1308
+ htmlToText: htmlToText,
1309
+ isCommentNode: isCommentNode,
1310
+ isHTMLElement: isHTMLElement,
1311
+ isTextNode: isTextNode$1,
1312
+ nodeTagIn: nodeTagIn,
1313
+ nodeTagIs: nodeTagIs
1514
1314
  });
1515
1315
 
1516
- const ArcUtils = {
1517
- Id,
1518
- ANS,
1519
- ContentElements,
1520
- Section,
1521
- };
1522
-
1523
1316
  /**
1524
- * Base class for all arc entities, it provides common methods and properties
1525
- * If you want to create a new entity subtype you should extend this class
1317
+ * HTMLProcessor is responsible for parsing HTML content into structured content elements.
1318
+ * It provides a flexible way to handle different HTML nodes and wrap text content.
1526
1319
  *
1527
- * Use case: You want to migrate stories from BBC
1528
- * You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
1529
- * Then you can override the specific methods to enrich the story with the data from BBC
1320
+ * The processor can be extended with custom handlers for specific node types and
1321
+ * wrappers for text content.
1530
1322
  *
1531
- * To migrate it call .migrate() method
1323
+ * @example
1324
+ * ```ts
1325
+ * // Create and initialize processor
1326
+ * const processor = new HTMLProcessor();
1327
+ * processor.init();
1328
+ *
1329
+ * // Parse HTML content
1330
+ * const html = '<div><p>Some text</p><img src="image.jpg"></div>';
1331
+ * const elements = await processor.parse(html);
1332
+ * ```
1333
+ *
1334
+ * The processor comes with built-in handlers for common HTML elements like links,
1335
+ * text formatting (i, u, strong), and block elements. Custom handlers can be added
1336
+ * using the `handle()` and `wrap()` methods.
1532
1337
  */
1533
- class Document {
1338
+ class HTMLProcessor {
1339
+ constructor() {
1340
+ this.parallelProcessing = true;
1341
+ this.handlers = {
1342
+ node: new Map(),
1343
+ wrap: new Map(),
1344
+ };
1345
+ }
1346
+ init() {
1347
+ // wrappers are used to wrap the content of nested text nodes
1348
+ // in a specific way
1349
+ this.wrap('link', (node, text) => {
1350
+ if (nodeTagIn(node, ['a'])) {
1351
+ const attributes = ['href', 'target', 'rel']
1352
+ .map((attr) => [attr, getHTMLElementAttribute(node, attr)])
1353
+ .filter(([_, value]) => value)
1354
+ .map(([key, value]) => `${key}="${value}"`)
1355
+ .join(' ');
1356
+ return {
1357
+ ...text,
1358
+ content: `<a ${attributes}>${text.content}</a>`,
1359
+ };
1360
+ }
1361
+ });
1362
+ this.wrap('i', (node, text) => {
1363
+ if (nodeTagIn(node, ['i'])) {
1364
+ return {
1365
+ ...text,
1366
+ content: `<i>${text.content}</i>`,
1367
+ };
1368
+ }
1369
+ });
1370
+ this.wrap('u', (node, text) => {
1371
+ if (nodeTagIn(node, ['u'])) {
1372
+ return {
1373
+ ...text,
1374
+ content: `<u>${text.content}</u>`,
1375
+ };
1376
+ }
1377
+ });
1378
+ this.wrap('sup/sub', (node, text) => {
1379
+ if (nodeTagIn(node, ['sup', 'sub'])) {
1380
+ return {
1381
+ ...text,
1382
+ content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
1383
+ };
1384
+ }
1385
+ });
1386
+ this.wrap('strong', (node, text) => {
1387
+ if (nodeTagIn(node, ['strong', 'b'])) {
1388
+ return {
1389
+ ...text,
1390
+ content: `<b>${text.content}</b>`,
1391
+ };
1392
+ }
1393
+ });
1394
+ this.wrap('center', (node, text) => {
1395
+ if (nodeTagIn(node, ['center'])) {
1396
+ return {
1397
+ ...text,
1398
+ alignment: 'center',
1399
+ };
1400
+ }
1401
+ });
1402
+ this.wrap('aligned-paragraph', (node, text) => {
1403
+ if (nodeTagIn(node, ['p'])) {
1404
+ const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
1405
+ if (!styleAttribute)
1406
+ return text;
1407
+ if (styleAttribute.includes('text-align: right;')) {
1408
+ return {
1409
+ ...text,
1410
+ alignment: 'right',
1411
+ };
1412
+ }
1413
+ if (styleAttribute.includes('text-align: left;')) {
1414
+ return {
1415
+ ...text,
1416
+ alignment: 'left',
1417
+ };
1418
+ }
1419
+ if (styleAttribute.includes('text-align: center;')) {
1420
+ return {
1421
+ ...text,
1422
+ alignment: 'center',
1423
+ };
1424
+ }
1425
+ return text;
1426
+ }
1427
+ });
1428
+ // handlers are used to handle specific nodes
1429
+ // and return a list of content elements
1430
+ this.handle('default', (node) => {
1431
+ const noTag = isHTMLElement(node) && !node.tagName;
1432
+ if (noTag ||
1433
+ nodeTagIn(node, [
1434
+ 'p',
1435
+ 'a',
1436
+ 'b',
1437
+ 'sup',
1438
+ 'sub',
1439
+ 'span',
1440
+ 'strong',
1441
+ 'em',
1442
+ 'i',
1443
+ 'u',
1444
+ 'section',
1445
+ 'main',
1446
+ 'div',
1447
+ 'li',
1448
+ 'center',
1449
+ ])) {
1450
+ return this.handleNested(node);
1451
+ }
1452
+ });
1453
+ this.handle('headers', (node) => {
1454
+ if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
1455
+ return this.createHeader(node);
1456
+ }
1457
+ });
1458
+ this.handle('text', (node) => {
1459
+ if (isTextNode$1(node)) {
1460
+ return this.createText(node);
1461
+ }
1462
+ });
1463
+ this.handle('comment', (node) => {
1464
+ if (isCommentNode(node)) {
1465
+ return this.handleComment(node);
1466
+ }
1467
+ });
1468
+ this.handle('list', async (node) => {
1469
+ if (nodeTagIn(node, ['ul', 'ol'])) {
1470
+ const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
1471
+ return this.createList(node, listType);
1472
+ }
1473
+ });
1474
+ this.handle('table', (node) => {
1475
+ if (nodeTagIs(node, 'table')) {
1476
+ return this.handleTable(node);
1477
+ }
1478
+ });
1479
+ this.handle('iframe', (node) => {
1480
+ if (nodeTagIs(node, 'iframe')) {
1481
+ return this.handleIframe(node);
1482
+ }
1483
+ });
1484
+ this.handle('img', (node) => {
1485
+ if (nodeTagIs(node, 'img')) {
1486
+ return this.handleImage(node);
1487
+ }
1488
+ });
1489
+ this.handle('br', (node) => {
1490
+ if (nodeTagIs(node, 'br')) {
1491
+ return this.handleBreak(node);
1492
+ }
1493
+ });
1494
+ }
1495
+ handle(name, handler) {
1496
+ if (this.handlers.node.has(name)) {
1497
+ this.warn({ name }, `${name} node handler already set`);
1498
+ }
1499
+ this.handlers.node.set(name, handler);
1500
+ }
1501
+ wrap(name, handler) {
1502
+ if (this.handlers.wrap.has(name)) {
1503
+ this.warn({ name }, `${name} wrap handler already set`);
1504
+ }
1505
+ this.handlers.wrap.set(name, handler);
1506
+ }
1507
+ async parse(html) {
1508
+ const doc = nodeHtmlParser.parse(html, { comment: true });
1509
+ doc.removeWhitespace();
1510
+ const elements = await this.process(doc);
1511
+ const filtered = elements?.filter((e) => e.type !== 'divider');
1512
+ return filtered || [];
1513
+ }
1514
+ addTextAdditionalProperties(c, parent) {
1515
+ const additionalProperties = c.additional_properties || {};
1516
+ const parentNodeIsBlockElement = this.isBlockElement(parent);
1517
+ c.additional_properties = {
1518
+ ...c.additional_properties,
1519
+ isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
1520
+ };
1521
+ return c;
1522
+ }
1523
+ /**
1524
+ * Wraps text content elements with additional properties and handlers.
1525
+ * This method iterates through an array of content elements and applies
1526
+ * wrappers to text elements.
1527
+ *
1528
+ * @param node - The HTML node containing the text elements
1529
+ **/
1530
+ wrapChildrenTextNodes(node, elements) {
1531
+ const wrapped = [];
1532
+ const wrappers = [...this.handlers.wrap.values()];
1533
+ for (const c of elements) {
1534
+ if (!isTextCE(c)) {
1535
+ wrapped.push(c);
1536
+ continue;
1537
+ }
1538
+ this.addTextAdditionalProperties(c, node);
1539
+ const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
1540
+ wrapped.push(handled || c);
1541
+ }
1542
+ return wrapped;
1543
+ }
1544
+ /**
1545
+ * Handles nested nodes by processing their children and merging text elements.
1546
+ * This method recursively processes the children of a given HTML node and
1547
+ * returns a list of content elements.
1548
+ *
1549
+ * @param node - The HTML node to process
1550
+ **/
1551
+ async handleNested(node) {
1552
+ const children = await this.processChildNodes(node);
1553
+ const filtered = children.filter(Boolean).flat();
1554
+ const merged = this.mergeParagraphs(filtered);
1555
+ const wrapped = this.wrapChildrenTextNodes(node, merged);
1556
+ return wrapped;
1557
+ }
1558
+ async processChildNodes(node) {
1559
+ if (this.parallelProcessing) {
1560
+ return await Promise.all(node.childNodes.map((child) => this.process(child)));
1561
+ }
1562
+ const children = [];
1563
+ for (const child of node.childNodes) {
1564
+ children.push(await this.process(child));
1565
+ }
1566
+ return children;
1567
+ }
1568
+ /**
1569
+ * Processes a single HTML node and converts it into content elements.
1570
+ * This method iterates through registered node handlers and attempts to process the node.
1571
+ * If a handler successfully processes the node, it returns an array of content elements.
1572
+ *
1573
+ * @param node - The HTML node to process
1574
+ * @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
1575
+ */
1576
+ async process(node) {
1577
+ let isKnownNode = false;
1578
+ const elements = [];
1579
+ for (const [name, handler] of this.handlers.node.entries()) {
1580
+ try {
1581
+ const result = await handler(node);
1582
+ if (result) {
1583
+ // if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
1584
+ isKnownNode = true;
1585
+ elements.push(...result);
1586
+ break;
1587
+ }
1588
+ }
1589
+ catch (error) {
1590
+ this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
1591
+ }
1592
+ }
1593
+ if (isKnownNode)
1594
+ return elements;
1595
+ this.warn({ node: node.toString() }, 'UnknownNodeError');
1596
+ }
1597
+ /**
1598
+ * Merges adjacent text content elements into a single paragraph.
1599
+ * This method iterates through an array of content elements and combines
1600
+ * adjacent text elements into a single paragraph.
1601
+ *
1602
+ * @param items - The array of content elements to merge
1603
+ **/
1604
+ mergeParagraphs(items) {
1605
+ const merged = [];
1606
+ let toMerge = [];
1607
+ const merge = () => {
1608
+ if (!toMerge.length)
1609
+ return;
1610
+ const paragraph = toMerge.reduce((acc, p) => {
1611
+ return {
1612
+ ...p,
1613
+ content: acc.content + p.content,
1614
+ };
1615
+ }, { type: 'text', content: '' });
1616
+ merged.push(paragraph);
1617
+ toMerge = [];
1618
+ };
1619
+ for (let i = 0; i < items.length; i++) {
1620
+ const item = items[i];
1621
+ const isBlockElement = item.additional_properties?.isBlockElement;
1622
+ if (isTextCE(item) && !isBlockElement) {
1623
+ toMerge.push(item);
1624
+ }
1625
+ else {
1626
+ merge();
1627
+ merged.push(item);
1628
+ }
1629
+ }
1630
+ merge();
1631
+ return merged;
1632
+ }
1633
+ handleComment(_) {
1634
+ return [];
1635
+ }
1636
+ async handleTable(node) {
1637
+ return [ContentElement$1.raw_html(node.toString())];
1638
+ }
1639
+ async handleIframe(node) {
1640
+ return [ContentElement$1.raw_html(node.toString())];
1641
+ }
1642
+ async handleImage(node) {
1643
+ return [ContentElement$1.raw_html(node.toString())];
1644
+ }
1645
+ async handleBreak(_) {
1646
+ return [ContentElement$1.divider()];
1647
+ }
1648
+ async createQuote(node) {
1649
+ const items = await this.handleNested(node);
1650
+ return [ContentElement$1.quote(items)];
1651
+ }
1652
+ async createText(node) {
1653
+ const text = ContentElement$1.text(node.text);
1654
+ return [text];
1655
+ }
1656
+ filterListItems(items) {
1657
+ return items.filter((i) => ['text', 'list'].includes(i.type));
1658
+ }
1659
+ async createList(node, type) {
1660
+ const items = await this.handleNested(node);
1661
+ return [ContentElement$1.list(type, this.filterListItems(items))];
1662
+ }
1663
+ async createHeader(node) {
1664
+ const level = +node.tagName.split('H')[1] || 3;
1665
+ return [ContentElement$1.header(node.innerText, level)];
1666
+ }
1667
+ isBlockElement(node) {
1668
+ if (!isHTMLElement(node))
1669
+ return false;
1670
+ const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS$1);
1671
+ return defaultBlockElements.has(node.tagName);
1672
+ }
1673
+ warn(metadata, message) {
1674
+ console.warn(metadata, message);
1675
+ }
1676
+ }
1677
+
1678
+ var index$4 = /*#__PURE__*/Object.freeze({
1679
+ __proto__: null,
1680
+ Constants: html_constants,
1681
+ HTMLProcessor: HTMLProcessor,
1682
+ Utils: html_utils
1683
+ });
1684
+
1685
+ const BLOCK_ELEMENT_TAGS = ['paragraph', 'line', 'header', 'ul', 'ol', 'li', 'embed', 'iframe', 'table'];
1686
+
1687
+ var xml_constants = /*#__PURE__*/Object.freeze({
1688
+ __proto__: null,
1689
+ BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
1690
+ });
1691
+
1692
+ const isXmlElement = (node) => {
1693
+ return node?.type === 'element';
1694
+ };
1695
+ const isTextNode = (node) => {
1696
+ return node?.type === 'text';
1697
+ };
1698
+ const nodeNameIs = (node, name) => {
1699
+ return isXmlElement(node) && node.name === name;
1700
+ };
1701
+ const nodeNameIn = (node, names) => {
1702
+ return isXmlElement(node) && names.includes(node.name);
1703
+ };
1704
+
1705
+ var xml_utils = /*#__PURE__*/Object.freeze({
1706
+ __proto__: null,
1707
+ isTextNode: isTextNode,
1708
+ isXmlElement: isXmlElement,
1709
+ nodeNameIn: nodeNameIn,
1710
+ nodeNameIs: nodeNameIs
1711
+ });
1712
+
1713
+ const ContentElement = ContentElement$1;
1714
+ class XMLProcessor {
1715
+ constructor() {
1716
+ this.handlers = {
1717
+ node: new Map(),
1718
+ wrap: new Map(),
1719
+ };
1720
+ }
1721
+ init() {
1722
+ // wrappers are used to wrap the content of nested text nodes
1723
+ // in a specific way
1724
+ this.wrap('link', (node, content) => {
1725
+ return `<a href="${node.attr.url || node.attr.href || '/'}">${content}</a>`;
1726
+ });
1727
+ this.wrap('header', (_node, content) => {
1728
+ return `<h3>${content}</h3>`;
1729
+ });
1730
+ this.wrap('emphasize', (_node, content) => {
1731
+ return `<i>${content}</i>`;
1732
+ });
1733
+ this.wrap('strong', (_node, content) => {
1734
+ return `<b>${content}</b>`;
1735
+ });
1736
+ // handlers are used to handle specific nodes
1737
+ // and return a list of content elements
1738
+ this.handle('default', (node) => {
1739
+ if (nodeNameIn(node, ['section', 'paragraph', 'line', 'header', 'emphasize', 'strong', 'link', 'li'])) {
1740
+ return this.handleNested(node);
1741
+ }
1742
+ });
1743
+ this.handle('text', (node) => {
1744
+ if (isTextNode(node)) {
1745
+ return [ContentElement.text(node.text)];
1746
+ }
1747
+ });
1748
+ this.handle('list', async (node) => {
1749
+ if (nodeNameIn(node, ['ul', 'ol'])) {
1750
+ const listType = node.name === 'ul' ? 'unordered' : 'ordered';
1751
+ return this.createList(node, listType);
1752
+ }
1753
+ });
1754
+ this.handle('table', (node) => {
1755
+ if (nodeNameIs(node, 'table')) {
1756
+ return this.handleTable(node);
1757
+ }
1758
+ });
1759
+ }
1760
+ async parse(xml) {
1761
+ const doc = new xmldoc__namespace.XmlDocument(xml);
1762
+ const elements = await this.process(doc);
1763
+ return elements || [];
1764
+ }
1765
+ handle(name, handler) {
1766
+ if (this.handlers.node.has(name)) {
1767
+ throw new Error(`${name} node handler already set`);
1768
+ }
1769
+ this.handlers.node.set(name, handler);
1770
+ }
1771
+ wrap(name, handler) {
1772
+ if (this.handlers.wrap.has(name)) {
1773
+ throw new Error(`${name} wrap handler already set`);
1774
+ }
1775
+ this.handlers.wrap.set(name, handler);
1776
+ }
1777
+ addTextAdditionalProperties(c, parent) {
1778
+ const additionalProperties = c.additional_properties || {};
1779
+ const parentNodeIsBlockElement = this.isBlockElement(parent);
1780
+ c.additional_properties = {
1781
+ ...c.additional_properties,
1782
+ isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
1783
+ };
1784
+ return c;
1785
+ }
1786
+ wrapChildrenTextNodes(node, elements) {
1787
+ const wrapped = [];
1788
+ for (const c of elements) {
1789
+ if (!isTextCE(c)) {
1790
+ wrapped.push(c);
1791
+ continue;
1792
+ }
1793
+ this.addTextAdditionalProperties(c, node);
1794
+ const handler = this.handlers.wrap.get(node.name);
1795
+ if (handler) {
1796
+ wrapped.push({
1797
+ ...c,
1798
+ content: handler(node, c.content),
1799
+ });
1800
+ }
1801
+ else {
1802
+ wrapped.push(c);
1803
+ }
1804
+ }
1805
+ return wrapped;
1806
+ }
1807
+ async handleNested(node) {
1808
+ const children = await Promise.all(node.children.map((child) => this.process(child)));
1809
+ const filtered = children.filter(Boolean).flat();
1810
+ const merged = this.mergeParagraphs(filtered);
1811
+ const wrapped = this.wrapChildrenTextNodes(node, merged);
1812
+ return wrapped;
1813
+ }
1814
+ async process(node) {
1815
+ let isKnownNode = false;
1816
+ const elements = [];
1817
+ for (const [name, handler] of this.handlers.node.entries()) {
1818
+ try {
1819
+ const result = await handler(node);
1820
+ if (Array.isArray(result)) {
1821
+ // if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
1822
+ isKnownNode = true;
1823
+ elements.push(...result);
1824
+ break;
1825
+ }
1826
+ }
1827
+ catch (error) {
1828
+ this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
1829
+ }
1830
+ }
1831
+ if (isKnownNode)
1832
+ return elements;
1833
+ this.warn({ node: node.toString(), type: node.type }, 'UnknownNodeError');
1834
+ }
1835
+ mergeParagraphs(items) {
1836
+ const merged = [];
1837
+ let toMerge = [];
1838
+ const merge = () => {
1839
+ if (!toMerge.length)
1840
+ return;
1841
+ const paragraph = toMerge.reduce((acc, p) => {
1842
+ return {
1843
+ ...p,
1844
+ content: acc.content + p.content,
1845
+ };
1846
+ }, { type: 'text', content: '' });
1847
+ merged.push(paragraph);
1848
+ toMerge = [];
1849
+ };
1850
+ for (let i = 0; i < items.length; i++) {
1851
+ const item = items[i];
1852
+ const isBlockElement = item.additional_properties?.isBlockElement;
1853
+ if (isTextCE(item) && !isBlockElement) {
1854
+ toMerge.push(item);
1855
+ }
1856
+ else {
1857
+ merge();
1858
+ merged.push(item);
1859
+ }
1860
+ }
1861
+ merge();
1862
+ return merged;
1863
+ }
1864
+ async handleTable(node) {
1865
+ const html = node.toString({ html: true });
1866
+ return [ContentElement.raw_html(html)];
1867
+ }
1868
+ async createQuote(node) {
1869
+ const items = await this.handleNested(node);
1870
+ return [ContentElement.quote(items)];
1871
+ }
1872
+ async createList(node, type) {
1873
+ const items = await this.handleNested(node);
1874
+ return [ContentElement.list(type, items)];
1875
+ }
1876
+ getNodeInnerText(node) {
1877
+ return node.children.map((n) => this.htmlFromNode(n).innerText.trim());
1878
+ }
1879
+ getNodeInnerHTML(node) {
1880
+ return node.children.map((n) => this.htmlFromNode(n).innerHTML.trim());
1881
+ }
1882
+ htmlFromNode(node) {
1883
+ return nodeHtmlParser.parse(node.toString({ html: true }));
1884
+ }
1885
+ getDecodedHTMLFromInnerNodes(node) {
1886
+ const encodedHtml = this.getNodeInnerText(node).join('');
1887
+ const decoded = decodeHTMLEntities(encodedHtml);
1888
+ return decoded;
1889
+ }
1890
+ isBlockElement(node) {
1891
+ const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
1892
+ if (defaultBlockElements.has(node.name))
1893
+ return true;
1894
+ }
1895
+ warn(metadata, message) {
1896
+ console.warn(metadata, message);
1897
+ }
1898
+ }
1899
+
1900
+ var index$3 = /*#__PURE__*/Object.freeze({
1901
+ __proto__: null,
1902
+ Constants: xml_constants,
1903
+ Utils: xml_utils,
1904
+ XMLProcessor: XMLProcessor
1905
+ });
1906
+
1907
+ var index$2 = /*#__PURE__*/Object.freeze({
1908
+ __proto__: null,
1909
+ ContentElement: ContentElement$1,
1910
+ HTML: index$4,
1911
+ XML: index$3
1912
+ });
1913
+
1914
+ /**
1915
+ * Base class for all arc entities, it provides common methods and properties
1916
+ * If you want to create a new entity subtype you should extend this class
1917
+ *
1918
+ * Use case: You want to migrate stories from BBC
1919
+ * You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
1920
+ * Then you can override the specific methods to enrich the story with the data from BBC
1921
+ *
1922
+ * To migrate it call .migrate() method
1923
+ */
1924
+ class Document {
1534
1925
  constructor() {
1535
1926
  this.ans = null;
1536
1927
  this.circulations = [];
@@ -1768,480 +2159,314 @@ class Story extends Document {
1768
2159
  }
1769
2160
  }
1770
2161
 
1771
- var index$2 = /*#__PURE__*/Object.freeze({
2162
+ var index$1 = /*#__PURE__*/Object.freeze({
1772
2163
  __proto__: null,
1773
2164
  Document: Document,
1774
2165
  Story: Story
1775
2166
  });
1776
2167
 
1777
- const BLOCK_ELEMENT_TAGS = [
1778
- 'ADDRESS',
1779
- 'ARTICLE',
1780
- 'ASIDE',
1781
- 'BLOCKQUOTE',
1782
- 'DETAILS',
1783
- 'DIV',
1784
- 'DL',
1785
- 'FIELDSET',
1786
- 'FIGCAPTION',
1787
- 'FIGURE',
1788
- 'FOOTER',
1789
- 'FORM',
1790
- 'H1',
1791
- 'H2',
1792
- 'H3',
1793
- 'H4',
1794
- 'H5',
1795
- 'H6',
1796
- 'HEADER',
1797
- 'HR',
1798
- 'LINE',
1799
- 'MAIN',
1800
- 'MENU',
1801
- 'NAV',
1802
- 'OL',
1803
- 'P',
1804
- 'PARAGRAPH',
1805
- 'PRE',
1806
- 'SECTION',
1807
- 'TABLE',
1808
- 'UL',
1809
- 'LI',
1810
- 'BODY',
1811
- 'HTML',
1812
- ];
2168
+ var ANSType;
2169
+ (function (ANSType) {
2170
+ ANSType["Story"] = "story";
2171
+ ANSType["Video"] = "video";
2172
+ ANSType["Tag"] = "tag";
2173
+ ANSType["Author"] = "author";
2174
+ ANSType["Gallery"] = "gallery";
2175
+ ANSType["Image"] = "image";
2176
+ ANSType["Redirect"] = "redirect";
2177
+ })(ANSType || (ANSType = {}));
2178
+ var MigrationStatus;
2179
+ (function (MigrationStatus) {
2180
+ MigrationStatus["Success"] = "Success";
2181
+ MigrationStatus["Queued"] = "Queued";
2182
+ MigrationStatus["Circulated"] = "Circulated";
2183
+ MigrationStatus["Published"] = "Published";
2184
+ MigrationStatus["Scheduled"] = "Scheduled";
2185
+ MigrationStatus["FailVideo"] = "FailVideo";
2186
+ MigrationStatus["FailImage"] = "FailImage";
2187
+ MigrationStatus["FailPhoto"] = "FailPhoto";
2188
+ MigrationStatus["FailStory"] = "FailStory";
2189
+ MigrationStatus["FailGallery"] = "FailGallery";
2190
+ MigrationStatus["FailAuthor"] = "FailAuthor";
2191
+ MigrationStatus["FailTag"] = "FailTag";
2192
+ MigrationStatus["ValidationFailed"] = "ValidationFailed";
2193
+ })(MigrationStatus || (MigrationStatus = {}));
2194
+ var SummarySortBy;
2195
+ (function (SummarySortBy) {
2196
+ SummarySortBy["CreateDate"] = "createDate";
2197
+ SummarySortBy["UpdateDate"] = "updateDate";
2198
+ SummarySortBy["Id"] = "id";
2199
+ })(SummarySortBy || (SummarySortBy = {}));
2200
+ var SummarySortOrder;
2201
+ (function (SummarySortOrder) {
2202
+ SummarySortOrder["ASC"] = "ASC";
2203
+ SummarySortOrder["DESC"] = "DESC";
2204
+ })(SummarySortOrder || (SummarySortOrder = {}));
1813
2205
 
1814
- var html_constants = /*#__PURE__*/Object.freeze({
2206
+ /* eslint-disable */
2207
+ /**
2208
+ * This file was automatically generated by json-schema-to-typescript.
2209
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
2210
+ * and run json-schema-to-typescript to regenerate this file.
2211
+ */
2212
+
2213
+ var ansTypes = /*#__PURE__*/Object.freeze({
2214
+ __proto__: null
2215
+ });
2216
+
2217
+ var utils = /*#__PURE__*/Object.freeze({
2218
+ __proto__: null
2219
+ });
2220
+
2221
+ var index = /*#__PURE__*/Object.freeze({
1815
2222
  __proto__: null,
1816
- BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
2223
+ ANS: ansTypes,
2224
+ get ANSType () { return ANSType; },
2225
+ get MigrationStatus () { return MigrationStatus; },
2226
+ get SummarySortBy () { return SummarySortBy; },
2227
+ get SummarySortOrder () { return SummarySortOrder; },
2228
+ TypeUtils: utils
1817
2229
  });
1818
2230
 
1819
- const isTextNode = (node) => {
1820
- return node instanceof nodeHtmlParser.TextNode;
1821
- };
1822
- const isHTMLElement = (node) => {
1823
- return node instanceof nodeHtmlParser.HTMLElement;
1824
- };
1825
- const isCommentNode = (node) => {
1826
- return node instanceof nodeHtmlParser.CommentNode;
1827
- };
1828
- const nodeTagIs = (node, name) => {
1829
- return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
1830
- };
1831
- const nodeTagIn = (node, names) => {
1832
- return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
1833
- };
1834
- const isTextCE = (ce) => {
1835
- return ce?.type === 'text';
1836
- };
1837
- const decodeHTMLEntities = (str) => htmlEntities.decode(str);
1838
- const htmlToText = (html, parseOptions) => {
1839
- if (!html)
1840
- return '';
1841
- const doc = nodeHtmlParser.parse(html, parseOptions);
1842
- return decodeHTMLEntities(doc.innerText);
1843
- };
1844
- const getHTMLElementAttribute = (e, key) => {
1845
- const value = e.getAttribute(key);
1846
- if (value)
1847
- return value;
1848
- return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
2231
+ const reference = (ref) => {
2232
+ return {
2233
+ _id: ref.id,
2234
+ type: 'reference',
2235
+ referent: {
2236
+ ...ref,
2237
+ },
2238
+ };
1849
2239
  };
1850
2240
 
1851
- var html_utils = /*#__PURE__*/Object.freeze({
2241
+ var ANS = /*#__PURE__*/Object.freeze({
1852
2242
  __proto__: null,
1853
- decodeHTMLEntities: decodeHTMLEntities,
1854
- getHTMLElementAttribute: getHTMLElementAttribute,
1855
- htmlToText: htmlToText,
1856
- isCommentNode: isCommentNode,
1857
- isHTMLElement: isHTMLElement,
1858
- isTextCE: isTextCE,
1859
- isTextNode: isTextNode,
1860
- nodeTagIn: nodeTagIn,
1861
- nodeTagIs: nodeTagIs
2243
+ reference: reference
1862
2244
  });
1863
2245
 
2246
+ const generateArcId = (identifier, orgHostname) => {
2247
+ const namespace = uuid.v5(orgHostname, uuid.v5.DNS);
2248
+ const buffer = uuid.v5(identifier, namespace, Buffer.alloc(16));
2249
+ return encode(buffer, 'RFC4648', { padding: false });
2250
+ };
1864
2251
  /**
1865
- * HTMLProcessor is responsible for parsing HTML content into structured content elements.
1866
- * It provides a flexible way to handle different HTML nodes and wrap text content.
1867
- *
1868
- * The processor can be extended with custom handlers for specific node types and
1869
- * wrappers for text content.
2252
+ * Utility class for generating Arc IDs and source IDs
1870
2253
  *
1871
2254
  * @example
1872
2255
  * ```ts
1873
- * // Create and initialize processor
1874
- * const processor = new HTMLProcessor();
1875
- * processor.init();
1876
- *
1877
- * // Parse HTML content
1878
- * const html = '<div><p>Some text</p><img src="image.jpg"></div>';
1879
- * const elements = await processor.parse(html);
2256
+ * const generator = new IdGenerator(['my-org']);
2257
+ * const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
2258
+ * const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
1880
2259
  * ```
1881
- *
1882
- * The processor comes with built-in handlers for common HTML elements like links,
1883
- * text formatting (i, u, strong), and block elements. Custom handlers can be added
1884
- * using the `handle()` and `wrap()` methods.
1885
2260
  */
1886
- class HTMLProcessor {
1887
- constructor() {
1888
- this.parallelProcessing = true;
1889
- this.handlers = {
1890
- node: new Map(),
1891
- wrap: new Map(),
1892
- };
1893
- }
1894
- init() {
1895
- // wrappers are used to wrap the content of nested text nodes
1896
- // in a specific way
1897
- this.wrap('link', (node, text) => {
1898
- if (nodeTagIn(node, ['a'])) {
1899
- const attributes = ['href', 'target', 'rel']
1900
- .map((attr) => [attr, getHTMLElementAttribute(node, attr)])
1901
- .filter(([_, value]) => value)
1902
- .map(([key, value]) => `${key}="${value}"`)
1903
- .join(' ');
1904
- return {
1905
- ...text,
1906
- content: `<a ${attributes}>${text.content}</a>`,
1907
- };
1908
- }
1909
- });
1910
- this.wrap('i', (node, text) => {
1911
- if (nodeTagIn(node, ['i'])) {
1912
- return {
1913
- ...text,
1914
- content: `<i>${text.content}</i>`,
1915
- };
1916
- }
1917
- });
1918
- this.wrap('u', (node, text) => {
1919
- if (nodeTagIn(node, ['u'])) {
1920
- return {
1921
- ...text,
1922
- content: `<u>${text.content}</u>`,
1923
- };
1924
- }
1925
- });
1926
- this.wrap('sup/sub', (node, text) => {
1927
- if (nodeTagIn(node, ['sup', 'sub'])) {
1928
- return {
1929
- ...text,
1930
- content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
1931
- };
1932
- }
1933
- });
1934
- this.wrap('strong', (node, text) => {
1935
- if (nodeTagIn(node, ['strong', 'b'])) {
1936
- return {
1937
- ...text,
1938
- content: `<b>${text.content}</b>`,
1939
- };
1940
- }
1941
- });
1942
- this.wrap('center', (node, text) => {
1943
- if (nodeTagIn(node, ['center'])) {
1944
- return {
1945
- ...text,
1946
- alignment: 'center',
1947
- };
1948
- }
1949
- });
1950
- this.wrap('aligned-paragraph', (node, text) => {
1951
- if (nodeTagIn(node, ['p'])) {
1952
- const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
1953
- if (!styleAttribute)
1954
- return text;
1955
- if (styleAttribute.includes('text-align: right;')) {
1956
- return {
1957
- ...text,
1958
- alignment: 'right',
1959
- };
1960
- }
1961
- if (styleAttribute.includes('text-align: left;')) {
1962
- return {
1963
- ...text,
1964
- alignment: 'left',
1965
- };
1966
- }
1967
- if (styleAttribute.includes('text-align: center;')) {
1968
- return {
1969
- ...text,
1970
- alignment: 'center',
1971
- };
1972
- }
1973
- return text;
1974
- }
1975
- });
1976
- // handlers are used to handle specific nodes
1977
- // and return a list of content elements
1978
- this.handle('default', (node) => {
1979
- const noTag = isHTMLElement(node) && !node.tagName;
1980
- if (noTag ||
1981
- nodeTagIn(node, [
1982
- 'p',
1983
- 'a',
1984
- 'b',
1985
- 'sup',
1986
- 'sub',
1987
- 'span',
1988
- 'strong',
1989
- 'em',
1990
- 'i',
1991
- 'u',
1992
- 'section',
1993
- 'main',
1994
- 'div',
1995
- 'li',
1996
- 'center',
1997
- ])) {
1998
- return this.handleNested(node);
1999
- }
2000
- });
2001
- this.handle('headers', (node) => {
2002
- if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
2003
- return this.createHeader(node);
2004
- }
2005
- });
2006
- this.handle('text', (node) => {
2007
- if (isTextNode(node)) {
2008
- return this.createText(node);
2009
- }
2010
- });
2011
- this.handle('comment', (node) => {
2012
- if (isCommentNode(node)) {
2013
- return this.handleComment(node);
2014
- }
2015
- });
2016
- this.handle('list', async (node) => {
2017
- if (nodeTagIn(node, ['ul', 'ol'])) {
2018
- const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
2019
- return this.createList(node, listType);
2020
- }
2021
- });
2022
- this.handle('table', (node) => {
2023
- if (nodeTagIs(node, 'table')) {
2024
- return this.handleTable(node);
2025
- }
2026
- });
2027
- this.handle('iframe', (node) => {
2028
- if (nodeTagIs(node, 'iframe')) {
2029
- return this.handleIframe(node);
2030
- }
2031
- });
2032
- this.handle('img', (node) => {
2033
- if (nodeTagIs(node, 'img')) {
2034
- return this.handleImage(node);
2035
- }
2036
- });
2037
- this.handle('br', (node) => {
2038
- if (nodeTagIs(node, 'br')) {
2039
- return this.handleBreak(node);
2040
- }
2041
- });
2042
- }
2043
- handle(name, handler) {
2044
- if (this.handlers.node.has(name)) {
2045
- this.warn({ name }, `${name} node handler already set`);
2046
- }
2047
- this.handlers.node.set(name, handler);
2048
- }
2049
- wrap(name, handler) {
2050
- if (this.handlers.wrap.has(name)) {
2051
- this.warn({ name }, `${name} wrap handler already set`);
2261
+ class IdGenerator {
2262
+ constructor(namespaces) {
2263
+ if (!namespaces.length) {
2264
+ throw new Error('At least 1 namespace is required');
2052
2265
  }
2053
- this.handlers.wrap.set(name, handler);
2266
+ this.namespace = namespaces.join('-');
2054
2267
  }
2055
- async parse(html) {
2056
- const doc = nodeHtmlParser.parse(html, { comment: true });
2057
- doc.removeWhitespace();
2058
- const elements = await this.process(doc);
2059
- const filtered = elements?.filter((e) => e.type !== 'divider');
2060
- return filtered || [];
2268
+ getArcId(id) {
2269
+ return generateArcId(id.toString(), this.namespace);
2061
2270
  }
2062
- addTextAdditionalProperties(c, parent) {
2063
- const additionalProperties = c.additional_properties || {};
2064
- const parentNodeIsBlockElement = this.isBlockElement(parent);
2065
- c.additional_properties = {
2066
- ...c.additional_properties,
2067
- isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
2068
- };
2069
- return c;
2271
+ getSourceId(id, prefixes = []) {
2272
+ return [...prefixes, id].join('-');
2070
2273
  }
2071
- /**
2072
- * Wraps text content elements with additional properties and handlers.
2073
- * This method iterates through an array of content elements and applies
2074
- * wrappers to text elements.
2075
- *
2076
- * @param node - The HTML node containing the text elements
2077
- **/
2078
- wrapChildrenTextNodes(node, elements) {
2079
- const wrapped = [];
2080
- const wrappers = [...this.handlers.wrap.values()];
2081
- for (const c of elements) {
2082
- if (!isTextCE(c)) {
2083
- wrapped.push(c);
2084
- continue;
2085
- }
2086
- this.addTextAdditionalProperties(c, node);
2087
- const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
2088
- wrapped.push(handled || c);
2274
+ }
2275
+
2276
+ var Id = /*#__PURE__*/Object.freeze({
2277
+ __proto__: null,
2278
+ IdGenerator: IdGenerator,
2279
+ generateArcId: generateArcId
2280
+ });
2281
+
2282
+ const buildTree = (items) => {
2283
+ const tree = [
2284
+ {
2285
+ id: '/',
2286
+ children: [],
2287
+ meta: new Proxy({}, {
2288
+ get: () => {
2289
+ throw new Error('Root node meta is not accessible');
2290
+ },
2291
+ }),
2292
+ parent: null,
2293
+ },
2294
+ ];
2295
+ // Track nodes at each level to maintain parent-child relationships
2296
+ // stores last node at each level
2297
+ const currLevelNodes = {
2298
+ 0: tree[0],
2299
+ };
2300
+ for (const item of items) {
2301
+ const node = {
2302
+ id: item.id,
2303
+ parent: null,
2304
+ children: [],
2305
+ meta: item,
2306
+ };
2307
+ // Determine the level of this node
2308
+ const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
2309
+ const level = Number(levelKey?.replace('N', '')) || 0;
2310
+ if (!level) {
2311
+ throw new Error(`Invalid level for section ${item.id}`);
2089
2312
  }
2090
- return wrapped;
2313
+ // This is a child node - attach to its parent
2314
+ const parentLevel = level - 1;
2315
+ const parentNode = currLevelNodes[parentLevel];
2316
+ if (parentNode) {
2317
+ node.parent = parentNode;
2318
+ parentNode.children.push(node);
2319
+ }
2320
+ else {
2321
+ throw new Error(`Parent node not found for section ${item.id}`);
2322
+ }
2323
+ // Set this as the current node for its level
2324
+ currLevelNodes[level] = node;
2091
2325
  }
2092
- /**
2093
- * Handles nested nodes by processing their children and merging text elements.
2094
- * This method recursively processes the children of a given HTML node and
2095
- * returns a list of content elements.
2096
- *
2097
- * @param node - The HTML node to process
2098
- **/
2099
- async handleNested(node) {
2100
- const children = await this.processChildNodes(node);
2101
- const filtered = children.filter(Boolean).flat();
2102
- const merged = this.mergeParagraphs(filtered);
2103
- const wrapped = this.wrapChildrenTextNodes(node, merged);
2104
- return wrapped;
2326
+ // return root nodes children
2327
+ return tree[0].children;
2328
+ };
2329
+ const flattenTree = (tree) => {
2330
+ const flatten = [];
2331
+ const traverse = (node) => {
2332
+ flatten.push(node);
2333
+ for (const child of node.children) {
2334
+ traverse(child);
2335
+ }
2336
+ };
2337
+ // traverse all root nodes and their children
2338
+ for (const node of tree) {
2339
+ traverse(node);
2105
2340
  }
2106
- async processChildNodes(node) {
2107
- if (this.parallelProcessing) {
2108
- return await Promise.all(node.childNodes.map((child) => this.process(child)));
2341
+ return flatten;
2342
+ };
2343
+ const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
2344
+ const groupByWebsites = (sections) => {
2345
+ return sections.reduce((acc, section) => {
2346
+ const website = section._website;
2347
+ if (!acc[website])
2348
+ acc[website] = [];
2349
+ acc[website].push(section);
2350
+ return acc;
2351
+ }, {});
2352
+ };
2353
+ const references = (sections) => {
2354
+ return sections.map((s) => reference({
2355
+ id: s._id,
2356
+ website: s._website,
2357
+ type: 'section',
2358
+ }));
2359
+ };
2360
+ const isReference = (section) => {
2361
+ return section?.type === 'reference' && section?.referent?.type === 'section';
2362
+ };
2363
+ const removeDuplicates = (sections) => {
2364
+ const map = new Map();
2365
+ sections.forEach((s) => {
2366
+ if (isReference(s)) {
2367
+ map.set(`${s.referent.id}${s.referent.website}`, s);
2109
2368
  }
2110
- const children = [];
2111
- for (const child of node.childNodes) {
2112
- children.push(await this.process(child));
2369
+ else {
2370
+ map.set(`${s._id}${s._website}`, s);
2113
2371
  }
2114
- return children;
2372
+ });
2373
+ return [...map.values()];
2374
+ };
2375
+ class SectionsRepository {
2376
+ constructor(arc) {
2377
+ this.arc = arc;
2378
+ this.sectionsByWebsite = {};
2379
+ this.websitesAreLoaded = false;
2115
2380
  }
2116
- /**
2117
- * Processes a single HTML node and converts it into content elements.
2118
- * This method iterates through registered node handlers and attempts to process the node.
2119
- * If a handler successfully processes the node, it returns an array of content elements.
2120
- *
2121
- * @param node - The HTML node to process
2122
- * @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
2123
- */
2124
- async process(node) {
2125
- let isKnownNode = false;
2126
- const elements = [];
2127
- for (const [name, handler] of this.handlers.node.entries()) {
2128
- try {
2129
- const result = await handler(node);
2130
- if (result) {
2131
- // if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
2132
- isKnownNode = true;
2133
- elements.push(...result);
2134
- break;
2135
- }
2136
- }
2137
- catch (error) {
2138
- this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
2139
- }
2140
- }
2141
- if (isKnownNode)
2142
- return elements;
2143
- this.warn({ node: node.toString() }, 'UnknownNodeError');
2381
+ async put(ans) {
2382
+ await this.arc.Site.putSection(ans);
2383
+ const created = await this.arc.Site.getSection(ans._id, ans.website);
2384
+ this.save(created);
2144
2385
  }
2145
- /**
2146
- * Merges adjacent text content elements into a single paragraph.
2147
- * This method iterates through an array of content elements and combines
2148
- * adjacent text elements into a single paragraph.
2149
- *
2150
- * @param items - The array of content elements to merge
2151
- **/
2152
- mergeParagraphs(items) {
2153
- const merged = [];
2154
- let toMerge = [];
2155
- const merge = () => {
2156
- if (!toMerge.length)
2157
- return;
2158
- const paragraph = toMerge.reduce((acc, p) => {
2159
- return {
2160
- ...p,
2161
- content: acc.content + p.content,
2162
- };
2163
- }, { type: 'text', content: '' });
2164
- merged.push(paragraph);
2165
- toMerge = [];
2166
- };
2167
- for (let i = 0; i < items.length; i++) {
2168
- const item = items[i];
2169
- const isBlockElement = item.additional_properties?.isBlockElement;
2170
- if (isTextCE(item) && !isBlockElement) {
2171
- toMerge.push(item);
2386
+ async loadWebsite(website) {
2387
+ const sections = [];
2388
+ let next = true;
2389
+ let offset = 0;
2390
+ while (next) {
2391
+ const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
2392
+ return { q_results: [] };
2393
+ });
2394
+ if (migrated.q_results.length) {
2395
+ sections.push(...migrated.q_results);
2396
+ offset += migrated.q_results.length;
2172
2397
  }
2173
2398
  else {
2174
- merge();
2175
- merged.push(item);
2399
+ next = false;
2176
2400
  }
2177
2401
  }
2178
- merge();
2179
- return merged;
2180
- }
2181
- handleComment(_) {
2182
- return [];
2183
- }
2184
- async handleTable(node) {
2185
- return [ContentElement.raw_html(node.toString())];
2186
- }
2187
- async handleIframe(node) {
2188
- return [ContentElement.raw_html(node.toString())];
2189
- }
2190
- async handleImage(node) {
2191
- return [ContentElement.raw_html(node.toString())];
2192
- }
2193
- async handleBreak(_) {
2194
- return [ContentElement.divider()];
2195
- }
2196
- async createQuote(node) {
2197
- const items = await this.handleNested(node);
2198
- return [ContentElement.quote(items)];
2402
+ return sections;
2199
2403
  }
2200
- async createText(node) {
2201
- const text = ContentElement.text(node.text);
2202
- return [text];
2404
+ async loadWebsites(websites) {
2405
+ for (const website of websites) {
2406
+ this.sectionsByWebsite[website] = await this.loadWebsite(website);
2407
+ }
2408
+ this.websitesAreLoaded = true;
2203
2409
  }
2204
- filterListItems(items) {
2205
- return items.filter((i) => ['text', 'list'].includes(i.type));
2410
+ save(section) {
2411
+ const website = section._website;
2412
+ assert.ok(website, 'Section must have a website');
2413
+ this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
2414
+ if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
2415
+ this.sectionsByWebsite[website].push(section);
2416
+ }
2206
2417
  }
2207
- async createList(node, type) {
2208
- const items = await this.handleNested(node);
2209
- return [ContentElement.list(type, this.filterListItems(items))];
2418
+ getById(id, website) {
2419
+ this.ensureWebsitesLoaded();
2420
+ const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
2421
+ return section;
2210
2422
  }
2211
- async createHeader(node) {
2212
- const level = +node.tagName.split('H')[1] || 3;
2213
- return [ContentElement.header(node.innerText, level)];
2423
+ getByWebsite(website) {
2424
+ this.ensureWebsitesLoaded();
2425
+ return this.sectionsByWebsite[website];
2214
2426
  }
2215
- isBlockElement(node) {
2216
- if (!isHTMLElement(node))
2217
- return false;
2218
- const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
2219
- return defaultBlockElements.has(node.tagName);
2427
+ getParentSections(section) {
2428
+ this.ensureWebsitesLoaded();
2429
+ const parents = [];
2430
+ let current = section;
2431
+ while (current.parent?.default && current.parent.default !== '/') {
2432
+ const parent = this.getById(current.parent.default, section._website);
2433
+ if (!parent)
2434
+ break;
2435
+ parents.push(parent);
2436
+ current = parent;
2437
+ }
2438
+ return parents;
2220
2439
  }
2221
- warn(metadata, message) {
2222
- console.warn(metadata, message);
2440
+ ensureWebsitesLoaded() {
2441
+ assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
2223
2442
  }
2224
2443
  }
2225
2444
 
2226
- var index$1 = /*#__PURE__*/Object.freeze({
2445
+ var Section = /*#__PURE__*/Object.freeze({
2227
2446
  __proto__: null,
2228
- Constants: html_constants,
2229
- HTMLProcessor: HTMLProcessor,
2230
- Utils: html_utils
2447
+ SectionsRepository: SectionsRepository,
2448
+ buildAndFlattenTree: buildAndFlattenTree,
2449
+ buildTree: buildTree,
2450
+ flattenTree: flattenTree,
2451
+ groupByWebsites: groupByWebsites,
2452
+ isReference: isReference,
2453
+ references: references,
2454
+ removeDuplicates: removeDuplicates
2231
2455
  });
2232
2456
 
2233
- var index = /*#__PURE__*/Object.freeze({
2234
- __proto__: null,
2235
- ContentElement: ContentElement,
2236
- HTML: index$1
2237
- });
2457
+ const ArcUtils = {
2458
+ Id,
2459
+ ANS,
2460
+ ContentElements,
2461
+ Section,
2462
+ };
2238
2463
 
2239
- exports.AnsMapper = index$2;
2464
+ exports.AnsMapper = index$1;
2240
2465
  exports.ArcAPI = ArcAPI;
2241
2466
  exports.ArcError = ArcError;
2242
- exports.ArcTypes = index$3;
2467
+ exports.ArcTypes = index;
2243
2468
  exports.ArcUtils = ArcUtils;
2244
- exports.ContentElements = index;
2469
+ exports.ContentElements = index$2;
2245
2470
  exports.WsClient = WsClient;
2246
2471
  exports.default = ArcAPI;
2247
2472
  //# sourceMappingURL=index.cjs.map