@code.store/arcxp-sdk-ts 5.2.0 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,11 +5,11 @@ import fs from 'node:fs';
5
5
  import path from 'node:path';
6
6
  import FormData from 'form-data';
7
7
  import * as ws from 'ws';
8
+ import { TextNode, HTMLElement, CommentNode, parse } from 'node-html-parser';
9
+ import { decode } from 'html-entities';
8
10
  import encode from 'base32-encode';
9
11
  import { v5 } from 'uuid';
10
12
  import assert from 'node:assert';
11
- import { TextNode, HTMLElement, CommentNode, parse } from 'node-html-parser';
12
- import { decode } from 'html-entities';
13
13
 
14
14
  const safeJSONStringify = (data) => {
15
15
  try {
@@ -113,28 +113,6 @@ class ArcAuthor extends ArcAbstractAPI {
113
113
  }
114
114
  }
115
115
 
116
- class ArcContentOps extends ArcAbstractAPI {
117
- constructor(options) {
118
- super({ ...options, apiPath: 'contentops/v1' });
119
- }
120
- async schedulePublish(payload) {
121
- const { data } = await this.client.put('/publish', payload);
122
- return data;
123
- }
124
- async scheduleUnpublish(payload) {
125
- const { data } = await this.client.put('/unpublish', payload);
126
- return data;
127
- }
128
- async unscheduleUnpublish(payload) {
129
- const { data } = await this.client.put('/unschedule_unpublish', payload);
130
- return data;
131
- }
132
- async unschedulePublish(payload) {
133
- const { data } = await this.client.put('/unschedule_publish', payload);
134
- return data;
135
- }
136
- }
137
-
138
116
  class ArcContent extends ArcAbstractAPI {
139
117
  constructor(options) {
140
118
  super({ ...options, apiPath: 'content/v4' });
@@ -159,6 +137,28 @@ class ArcContent extends ArcAbstractAPI {
159
137
  }
160
138
  }
161
139
 
140
+ class ArcContentOps extends ArcAbstractAPI {
141
+ constructor(options) {
142
+ super({ ...options, apiPath: 'contentops/v1' });
143
+ }
144
+ async schedulePublish(payload) {
145
+ const { data } = await this.client.put('/publish', payload);
146
+ return data;
147
+ }
148
+ async scheduleUnpublish(payload) {
149
+ const { data } = await this.client.put('/unpublish', payload);
150
+ return data;
151
+ }
152
+ async unscheduleUnpublish(payload) {
153
+ const { data } = await this.client.put('/unschedule_unpublish', payload);
154
+ return data;
155
+ }
156
+ async unschedulePublish(payload) {
157
+ const { data } = await this.client.put('/unschedule_publish', payload);
158
+ return data;
159
+ }
160
+ }
161
+
162
162
  class Custom extends ArcAbstractAPI {
163
163
  constructor(options) {
164
164
  super({ ...options, apiPath: '' });
@@ -172,6 +172,128 @@ class Custom extends ArcAbstractAPI {
172
172
  }
173
173
  }
174
174
 
175
+ class ArcDeveloperRetail extends ArcAbstractAPI {
176
+ constructor(options) {
177
+ super({ ...options, apiPath: 'retail/api/v1' });
178
+ }
179
+ // ============================================
180
+ // Product Methods
181
+ // ============================================
182
+ async getProductById(id, params) {
183
+ const { data } = await this.client.get(`/product/${id}`, { params });
184
+ return data;
185
+ }
186
+ async getProductBySku(sku, params) {
187
+ const { data } = await this.client.get(`/product/sku/${sku}`, { params });
188
+ return data;
189
+ }
190
+ async getProductByPriceCode(priceCode, params) {
191
+ const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
192
+ return data;
193
+ }
194
+ async getAllProducts(params) {
195
+ const { data } = await this.client.get('/product', { params });
196
+ return data;
197
+ }
198
+ // ============================================
199
+ // Pricing Strategy Methods
200
+ // ============================================
201
+ async getPricingStrategyById(id, params) {
202
+ const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
203
+ return data;
204
+ }
205
+ async getAllPricingStrategies(params) {
206
+ const { data } = await this.client.get('/pricing/strategy', { params });
207
+ return data;
208
+ }
209
+ // ============================================
210
+ // Pricing Rate Methods
211
+ // ============================================
212
+ async getPricingRateById(id, params) {
213
+ const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
214
+ return data;
215
+ }
216
+ async getAllPricingRates(params) {
217
+ const { data } = await this.client.get('/pricing/rate', { params });
218
+ return data;
219
+ }
220
+ // ============================================
221
+ // Pricing Cycle Methods
222
+ // ============================================
223
+ async getPricingCycle(priceCode, cycleIndex, startDate, params) {
224
+ const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
225
+ params,
226
+ });
227
+ return data;
228
+ }
229
+ // ============================================
230
+ // Campaign Methods
231
+ // ============================================
232
+ async getCampaignById(id, params) {
233
+ const { data } = await this.client.get(`/campaign/${id}`, { params });
234
+ return data;
235
+ }
236
+ async getCampaignByName(campaignName, params) {
237
+ const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
238
+ return data;
239
+ }
240
+ async getAllCampaigns(params) {
241
+ const { data } = await this.client.get('/campaign', { params });
242
+ return data;
243
+ }
244
+ // ============================================
245
+ // Campaign Category Methods
246
+ // ============================================
247
+ async getCampaignCategoryById(id, params) {
248
+ const { data } = await this.client.get(`/campaign/category/${id}`, { params });
249
+ return data;
250
+ }
251
+ async getAllCampaignCategories(params) {
252
+ const { data } = await this.client.get('/campaign/category', { params });
253
+ return data;
254
+ }
255
+ // ============================================
256
+ // Offer Methods
257
+ // ============================================
258
+ async getOfferById(id, params) {
259
+ const { data } = await this.client.get(`/offer/${id}`, { params });
260
+ return data;
261
+ }
262
+ async getAllOffers(params) {
263
+ const { data } = await this.client.get('/offer', { params });
264
+ return data;
265
+ }
266
+ // ============================================
267
+ // Offer Attribute Methods
268
+ // ============================================
269
+ async getOfferAttributeById(id, params) {
270
+ const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
271
+ return data;
272
+ }
273
+ async getAllOfferAttributes(params) {
274
+ const { data } = await this.client.get('/offer/attribute', { params });
275
+ return data;
276
+ }
277
+ // ============================================
278
+ // Product Attribute Methods
279
+ // ============================================
280
+ async getProductAttributeById(id, params) {
281
+ const { data } = await this.client.get(`/product/attribute/${id}`, { params });
282
+ return data;
283
+ }
284
+ async getAllProductAttributes(params) {
285
+ const { data } = await this.client.get('/product/attribute', { params });
286
+ return data;
287
+ }
288
+ // ============================================
289
+ // Condition Category Methods
290
+ // ============================================
291
+ async getAllConditionCategories(params) {
292
+ const { data } = await this.client.get('/condition/categories', { params });
293
+ return data;
294
+ }
295
+ }
296
+
175
297
  class ArcDraft extends ArcAbstractAPI {
176
298
  constructor(options) {
177
299
  super({ ...options, apiPath: 'draft/v1' });
@@ -568,190 +690,68 @@ class ArcRetailEvents {
568
690
  }
569
691
  }
570
692
 
571
- class ArcDeveloperRetail extends ArcAbstractAPI {
693
+ class ArcSales extends ArcAbstractAPI {
572
694
  constructor(options) {
573
- super({ ...options, apiPath: 'retail/api/v1' });
695
+ super({ ...options, apiPath: 'sales/api/v1' });
574
696
  }
575
- // ============================================
576
- // Product Methods
577
- // ============================================
578
- async getProductById(id, params) {
579
- const { data } = await this.client.get(`/product/${id}`, { params });
697
+ async migrate(params, payload) {
698
+ const FormData = await platform.form_data();
699
+ const form = new FormData();
700
+ form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
701
+ const { data } = await this.client.post('/migrate', form, {
702
+ params,
703
+ headers: {
704
+ ...form.getHeaders(),
705
+ },
706
+ });
580
707
  return data;
581
708
  }
582
- async getProductBySku(sku, params) {
583
- const { data } = await this.client.get(`/product/sku/${sku}`, { params });
584
- return data;
709
+ }
710
+ class ArcSalesV2 extends ArcAbstractAPI {
711
+ constructor(options) {
712
+ super({ ...options, apiPath: 'sales/api/v2' });
585
713
  }
586
- async getProductByPriceCode(priceCode, params) {
587
- const { data } = await this.client.get(`/product/pricecode/${priceCode}`, { params });
714
+ async getEnterpriseGroups(params) {
715
+ const { data } = await this.client.get('/subscriptions/enterprise', {
716
+ params: {
717
+ 'arc-site': params.site,
718
+ },
719
+ });
588
720
  return data;
589
721
  }
590
- async getAllProducts(params) {
591
- const { data } = await this.client.get('/product', { params });
722
+ async createEnterpriseGroup(params, payload) {
723
+ const { data } = await this.client.post('/subscriptions/enterprise', payload, {
724
+ params: {
725
+ 'arc-site': params.site,
726
+ },
727
+ });
592
728
  return data;
593
729
  }
594
- // ============================================
595
- // Pricing Strategy Methods
596
- // ============================================
597
- async getPricingStrategyById(id, params) {
598
- const { data } = await this.client.get(`/pricing/strategy/${id}`, { params });
730
+ async createNonce(website, enterpriseGroupId) {
731
+ const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
732
+ params: { 'arc-site': website },
733
+ });
599
734
  return data;
600
735
  }
601
- async getAllPricingStrategies(params) {
602
- const { data } = await this.client.get('/pricing/strategy', { params });
603
- return data;
736
+ }
737
+
738
+ class ArcSigningService extends ArcAbstractAPI {
739
+ constructor(options) {
740
+ super({ ...options, apiPath: 'signing-service' });
604
741
  }
605
- // ============================================
606
- // Pricing Rate Methods
607
- // ============================================
608
- async getPricingRateById(id, params) {
609
- const { data } = await this.client.get(`/pricing/rate/${id}`, { params });
742
+ async sign(service, serviceVersion, imageId) {
743
+ const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
610
744
  return data;
611
745
  }
612
- async getAllPricingRates(params) {
613
- const { data } = await this.client.get('/pricing/rate', { params });
614
- return data;
746
+ }
747
+
748
+ class ArcSite extends ArcAbstractAPI {
749
+ constructor(options) {
750
+ super({ ...options, apiPath: 'site/v3' });
615
751
  }
616
- // ============================================
617
- // Pricing Cycle Methods
618
- // ============================================
619
- async getPricingCycle(priceCode, cycleIndex, startDate, params) {
620
- const { data } = await this.client.get(`/pricing/cycle/${priceCode}/${cycleIndex}/${startDate}`, {
621
- params,
622
- });
623
- return data;
624
- }
625
- // ============================================
626
- // Campaign Methods
627
- // ============================================
628
- async getCampaignById(id, params) {
629
- const { data } = await this.client.get(`/campaign/${id}`, { params });
630
- return data;
631
- }
632
- async getCampaignByName(campaignName, params) {
633
- const { data } = await this.client.get(`/campaign/${campaignName}/get`, { params });
634
- return data;
635
- }
636
- async getAllCampaigns(params) {
637
- const { data } = await this.client.get('/campaign', { params });
638
- return data;
639
- }
640
- // ============================================
641
- // Campaign Category Methods
642
- // ============================================
643
- async getCampaignCategoryById(id, params) {
644
- const { data } = await this.client.get(`/campaign/category/${id}`, { params });
645
- return data;
646
- }
647
- async getAllCampaignCategories(params) {
648
- const { data } = await this.client.get('/campaign/category', { params });
649
- return data;
650
- }
651
- // ============================================
652
- // Offer Methods
653
- // ============================================
654
- async getOfferById(id, params) {
655
- const { data } = await this.client.get(`/offer/${id}`, { params });
656
- return data;
657
- }
658
- async getAllOffers(params) {
659
- const { data } = await this.client.get('/offer', { params });
660
- return data;
661
- }
662
- // ============================================
663
- // Offer Attribute Methods
664
- // ============================================
665
- async getOfferAttributeById(id, params) {
666
- const { data } = await this.client.get(`/offer/attribute/${id}`, { params });
667
- return data;
668
- }
669
- async getAllOfferAttributes(params) {
670
- const { data } = await this.client.get('/offer/attribute', { params });
671
- return data;
672
- }
673
- // ============================================
674
- // Product Attribute Methods
675
- // ============================================
676
- async getProductAttributeById(id, params) {
677
- const { data } = await this.client.get(`/product/attribute/${id}`, { params });
678
- return data;
679
- }
680
- async getAllProductAttributes(params) {
681
- const { data } = await this.client.get('/product/attribute', { params });
682
- return data;
683
- }
684
- // ============================================
685
- // Condition Category Methods
686
- // ============================================
687
- async getAllConditionCategories(params) {
688
- const { data } = await this.client.get('/condition/categories', { params });
689
- return data;
690
- }
691
- }
692
-
693
- class ArcSales extends ArcAbstractAPI {
694
- constructor(options) {
695
- super({ ...options, apiPath: 'sales/api/v1' });
696
- }
697
- async migrate(params, payload) {
698
- const FormData = await platform.form_data();
699
- const form = new FormData();
700
- form.append('file', JSON.stringify(payload), { filename: 'subs.json', contentType: 'application/json' });
701
- const { data } = await this.client.post('/migrate', form, {
702
- params,
703
- headers: {
704
- ...form.getHeaders(),
705
- },
706
- });
707
- return data;
708
- }
709
- }
710
- class ArcSalesV2 extends ArcAbstractAPI {
711
- constructor(options) {
712
- super({ ...options, apiPath: 'sales/api/v2' });
713
- }
714
- async getEnterpriseGroups(params) {
715
- const { data } = await this.client.get('/subscriptions/enterprise', {
716
- params: {
717
- 'arc-site': params.site,
718
- },
719
- });
720
- return data;
721
- }
722
- async createEnterpriseGroup(params, payload) {
723
- const { data } = await this.client.post('/subscriptions/enterprise', payload, {
724
- params: {
725
- 'arc-site': params.site,
726
- },
727
- });
728
- return data;
729
- }
730
- async createNonce(website, enterpriseGroupId) {
731
- const { data } = await this.client.get(`/subscriptions/enterprise/${enterpriseGroupId}`, {
732
- params: { 'arc-site': website },
733
- });
734
- return data;
735
- }
736
- }
737
-
738
- class ArcSigningService extends ArcAbstractAPI {
739
- constructor(options) {
740
- super({ ...options, apiPath: 'signing-service' });
741
- }
742
- async sign(service, serviceVersion, imageId) {
743
- const { data } = await this.client.get(`/v2/sign/${service}/${serviceVersion}?value=${encodeURI(imageId)}`);
744
- return data;
745
- }
746
- }
747
-
748
- class ArcSite extends ArcAbstractAPI {
749
- constructor(options) {
750
- super({ ...options, apiPath: 'site/v3' });
751
- }
752
- async getSections(params) {
753
- const { data } = await this.client.get(`/website/${params.website}/section`, {
754
- params: { _website: params.website, ...params },
752
+ async getSections(params) {
753
+ const { data } = await this.client.get(`/website/${params.website}/section`, {
754
+ params: { _website: params.website, ...params },
755
755
  });
756
756
  return data;
757
757
  }
@@ -889,84 +889,6 @@ const ArcAPI = (options) => {
889
889
  return API;
890
890
  };
891
891
 
892
- /* eslint-disable */
893
- /**
894
- * This file was automatically generated by json-schema-to-typescript.
895
- * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
896
- * and run json-schema-to-typescript to regenerate this file.
897
- */
898
-
899
- var ansTypes = /*#__PURE__*/Object.freeze({
900
- __proto__: null
901
- });
902
-
903
- var utils = /*#__PURE__*/Object.freeze({
904
- __proto__: null
905
- });
906
-
907
- var ANSType;
908
- (function (ANSType) {
909
- ANSType["Story"] = "story";
910
- ANSType["Video"] = "video";
911
- ANSType["Tag"] = "tag";
912
- ANSType["Author"] = "author";
913
- ANSType["Gallery"] = "gallery";
914
- ANSType["Image"] = "image";
915
- ANSType["Redirect"] = "redirect";
916
- })(ANSType || (ANSType = {}));
917
- var MigrationStatus;
918
- (function (MigrationStatus) {
919
- MigrationStatus["Success"] = "Success";
920
- MigrationStatus["Queued"] = "Queued";
921
- MigrationStatus["Circulated"] = "Circulated";
922
- MigrationStatus["Published"] = "Published";
923
- MigrationStatus["Scheduled"] = "Scheduled";
924
- MigrationStatus["FailVideo"] = "FailVideo";
925
- MigrationStatus["FailImage"] = "FailImage";
926
- MigrationStatus["FailPhoto"] = "FailPhoto";
927
- MigrationStatus["FailStory"] = "FailStory";
928
- MigrationStatus["FailGallery"] = "FailGallery";
929
- MigrationStatus["FailAuthor"] = "FailAuthor";
930
- MigrationStatus["FailTag"] = "FailTag";
931
- MigrationStatus["ValidationFailed"] = "ValidationFailed";
932
- })(MigrationStatus || (MigrationStatus = {}));
933
- var SummarySortBy;
934
- (function (SummarySortBy) {
935
- SummarySortBy["CreateDate"] = "createDate";
936
- SummarySortBy["UpdateDate"] = "updateDate";
937
- SummarySortBy["Id"] = "id";
938
- })(SummarySortBy || (SummarySortBy = {}));
939
- var SummarySortOrder;
940
- (function (SummarySortOrder) {
941
- SummarySortOrder["ASC"] = "ASC";
942
- SummarySortOrder["DESC"] = "DESC";
943
- })(SummarySortOrder || (SummarySortOrder = {}));
944
-
945
- var index$3 = /*#__PURE__*/Object.freeze({
946
- __proto__: null,
947
- ANS: ansTypes,
948
- get ANSType () { return ANSType; },
949
- get MigrationStatus () { return MigrationStatus; },
950
- get SummarySortBy () { return SummarySortBy; },
951
- get SummarySortOrder () { return SummarySortOrder; },
952
- TypeUtils: utils
953
- });
954
-
955
- const reference = (ref) => {
956
- return {
957
- _id: ref.id,
958
- type: 'reference',
959
- referent: {
960
- ...ref,
961
- },
962
- };
963
- };
964
-
965
- var ANS = /*#__PURE__*/Object.freeze({
966
- __proto__: null,
967
- reference: reference
968
- });
969
-
970
892
  const ContentElement = {
971
893
  divider: () => {
972
894
  return {
@@ -1199,18 +1121,60 @@ const ContentElement = {
1199
1121
  },
1200
1122
  };
1201
1123
 
1124
+ const BLOCK_ELEMENT_TAGS = [
1125
+ 'ADDRESS',
1126
+ 'ARTICLE',
1127
+ 'ASIDE',
1128
+ 'BLOCKQUOTE',
1129
+ 'DETAILS',
1130
+ 'DIV',
1131
+ 'DL',
1132
+ 'FIELDSET',
1133
+ 'FIGCAPTION',
1134
+ 'FIGURE',
1135
+ 'FOOTER',
1136
+ 'FORM',
1137
+ 'H1',
1138
+ 'H2',
1139
+ 'H3',
1140
+ 'H4',
1141
+ 'H5',
1142
+ 'H6',
1143
+ 'HEADER',
1144
+ 'HR',
1145
+ 'LINE',
1146
+ 'MAIN',
1147
+ 'MENU',
1148
+ 'NAV',
1149
+ 'OL',
1150
+ 'P',
1151
+ 'PARAGRAPH',
1152
+ 'PRE',
1153
+ 'SECTION',
1154
+ 'TABLE',
1155
+ 'UL',
1156
+ 'LI',
1157
+ 'BODY',
1158
+ 'HTML',
1159
+ ];
1160
+
1161
+ var html_constants = /*#__PURE__*/Object.freeze({
1162
+ __proto__: null,
1163
+ BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
1164
+ });
1165
+
1202
1166
  const socialRegExps = {
1203
- instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9\.\_\-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-\_\.]+)\/?([0-9]+)?/,
1204
- twitter: /https:\/\/(?:www\.)?twitter\.com\/[^\/]+\/status(?:es)?\/(\d+)/,
1205
- tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|\&item_id=)(\d+))|\w+)/,
1206
- facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^\/]+\/(activity|posts))[\/?].*$/,
1207
- facebookVideo: /https:\/\/www\.facebook\.com\/([^\/?].+\/)?video(s|\.php)[\/?].*/,
1167
+ instagram: /(?:https?:\/\/)?(?:www.)?instagram.com\/?([a-zA-Z0-9._-]+)?\/([p]+)?([reel]+)?([tv]+)?([stories]+)?\/([a-zA-Z0-9\-_.]+)\/?([0-9]+)?/,
1168
+ twitter: /https:\/\/(?:www\.)?twitter\.com\/[^/]+\/status(?:es)?\/(\d+)/,
1169
+ tiktok: /https:\/\/(?:m|www|vm)?\.?tiktok\.com\/((?:.*\b(?:(?:usr|v|embed|user|video)\/|\?shareId=|&item_id=)(\d+))|\w+)/,
1170
+ facebookPost: /https:\/\/www\.facebook\.com\/(photo(\.php|s)|permalink\.php|media|questions|notes|[^/]+\/(activity|posts))[/?].*$/,
1171
+ facebookVideo: /https:\/\/www\.facebook\.com\/([^/?].+\/)?video(s|\.php)[/?].*/,
1208
1172
  };
1209
1173
  function match(url, regex) {
1210
1174
  return url.match(regex)?.[0];
1211
1175
  }
1212
1176
  function youtubeURLParser(url = '') {
1213
- const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
1177
+ const regExp = /(?:youtube(?:-nocookie)?\.com\/(?:[^/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]vi?=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/;
1214
1178
  const id = url?.match(regExp)?.[1];
1215
1179
  if (id) {
1216
1180
  return `https://youtu.be/${id}`;
@@ -1265,257 +1229,459 @@ function createSocial(url = '') {
1265
1229
  return embeds;
1266
1230
  }
1267
1231
  const randomId = () => `${new Date().toISOString()}-${Math.random()}`;
1232
+ const isTextCE = (ce) => {
1233
+ return ce?.type === 'text';
1234
+ };
1235
+ const decodeHTMLEntities = (str) => decode(str);
1268
1236
 
1269
1237
  var ContentElements = /*#__PURE__*/Object.freeze({
1270
1238
  __proto__: null,
1271
1239
  createSocial: createSocial,
1240
+ decodeHTMLEntities: decodeHTMLEntities,
1272
1241
  facebookPostURLParser: facebookPostURLParser,
1273
1242
  facebookVideoURLParser: facebookVideoURLParser,
1274
1243
  instagramURLParser: instagramURLParser,
1244
+ isTextCE: isTextCE,
1275
1245
  randomId: randomId,
1276
1246
  tiktokURLParser: tiktokURLParser,
1277
1247
  twitterURLParser: twitterURLParser,
1278
1248
  youtubeURLParser: youtubeURLParser
1279
1249
  });
1280
1250
 
1281
- const generateArcId = (identifier, orgHostname) => {
1282
- const namespace = v5(orgHostname, v5.DNS);
1283
- const buffer = v5(identifier, namespace, Buffer.alloc(16));
1284
- return encode(buffer, 'RFC4648', { padding: false });
1251
+ const isTextNode = (node) => {
1252
+ return node instanceof TextNode;
1285
1253
  };
1286
- /**
1287
- * Utility class for generating Arc IDs and source IDs
1288
- *
1289
- * @example
1290
- * ```ts
1291
- * const generator = new IdGenerator(['my-org']);
1292
- * const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
1293
- * const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
1294
- * ```
1295
- */
1296
- class IdGenerator {
1297
- constructor(namespaces) {
1298
- if (!namespaces.length) {
1299
- throw new Error('At least 1 namespace is required');
1300
- }
1301
- this.namespace = namespaces.join('-');
1302
- }
1303
- getArcId(id) {
1304
- return generateArcId(id.toString(), this.namespace);
1305
- }
1306
- getSourceId(id, prefixes = []) {
1307
- return [...prefixes, id].join('-');
1308
- }
1309
- }
1310
-
1311
- var Id = /*#__PURE__*/Object.freeze({
1312
- __proto__: null,
1313
- IdGenerator: IdGenerator,
1314
- generateArcId: generateArcId
1315
- });
1316
-
1317
- const buildTree = (items) => {
1318
- const tree = [
1319
- {
1320
- id: '/',
1321
- children: [],
1322
- meta: new Proxy({}, {
1323
- get: () => {
1324
- throw new Error('Root node meta is not accessible');
1325
- },
1326
- }),
1327
- parent: null,
1328
- },
1329
- ];
1330
- // Track nodes at each level to maintain parent-child relationships
1331
- // stores last node at each level
1332
- const currLevelNodes = {
1333
- 0: tree[0],
1334
- };
1335
- for (const item of items) {
1336
- const node = {
1337
- id: item.id,
1338
- parent: null,
1339
- children: [],
1340
- meta: item,
1341
- };
1342
- // Determine the level of this node
1343
- const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
1344
- const level = Number(levelKey?.replace('N', '')) || 0;
1345
- if (!level) {
1346
- throw new Error(`Invalid level for section ${item.id}`);
1347
- }
1348
- // This is a child node - attach to its parent
1349
- const parentLevel = level - 1;
1350
- const parentNode = currLevelNodes[parentLevel];
1351
- if (parentNode) {
1352
- node.parent = parentNode;
1353
- parentNode.children.push(node);
1354
- }
1355
- else {
1356
- throw new Error(`Parent node not found for section ${item.id}`);
1357
- }
1358
- // Set this as the current node for its level
1359
- currLevelNodes[level] = node;
1360
- }
1361
- // return root nodes children
1362
- return tree[0].children;
1254
+ const isHTMLElement = (node) => {
1255
+ return node instanceof HTMLElement;
1363
1256
  };
1364
- const flattenTree = (tree) => {
1365
- const flatten = [];
1366
- const traverse = (node) => {
1367
- flatten.push(node);
1368
- for (const child of node.children) {
1369
- traverse(child);
1370
- }
1371
- };
1372
- // traverse all root nodes and their children
1373
- for (const node of tree) {
1374
- traverse(node);
1375
- }
1376
- return flatten;
1257
+ const isCommentNode = (node) => {
1258
+ return node instanceof CommentNode;
1377
1259
  };
1378
- const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
1379
- const groupByWebsites = (sections) => {
1380
- return sections.reduce((acc, section) => {
1381
- const website = section._website;
1382
- if (!acc[website])
1383
- acc[website] = [];
1384
- acc[website].push(section);
1385
- return acc;
1386
- }, {});
1260
+ const nodeTagIs = (node, name) => {
1261
+ return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
1387
1262
  };
1388
- const references = (sections) => {
1389
- return sections.map((s) => reference({
1390
- id: s._id,
1391
- website: s._website,
1392
- type: 'section',
1393
- }));
1263
+ const nodeTagIn = (node, names) => {
1264
+ return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
1394
1265
  };
1395
- const isReference = (section) => {
1396
- return section?.type === 'reference' && section?.referent?.type === 'section';
1266
+ const htmlToText = (html, parseOptions) => {
1267
+ if (!html)
1268
+ return '';
1269
+ const doc = parse(html, parseOptions);
1270
+ return decodeHTMLEntities(doc.innerText);
1397
1271
  };
1398
- const removeDuplicates = (sections) => {
1399
- const map = new Map();
1400
- sections.forEach((s) => {
1401
- if (isReference(s)) {
1402
- map.set(`${s.referent.id}${s.referent.website}`, s);
1403
- }
1404
- else {
1405
- map.set(`${s._id}${s._website}`, s);
1406
- }
1407
- });
1408
- return [...map.values()];
1272
+ const getHTMLElementAttribute = (e, key) => {
1273
+ const value = e.getAttribute(key);
1274
+ if (value)
1275
+ return value;
1276
+ return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
1409
1277
  };
1410
- class SectionsRepository {
1411
- constructor(arc) {
1412
- this.arc = arc;
1413
- this.sectionsByWebsite = {};
1414
- this.websitesAreLoaded = false;
1415
- }
1416
- async put(ans) {
1417
- await this.arc.Site.putSection(ans);
1418
- const created = await this.arc.Site.getSection(ans._id, ans.website);
1419
- this.save(created);
1420
- }
1421
- async loadWebsite(website) {
1422
- const sections = [];
1423
- let next = true;
1424
- let offset = 0;
1425
- while (next) {
1426
- const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
1427
- return { q_results: [] };
1428
- });
1429
- if (migrated.q_results.length) {
1430
- sections.push(...migrated.q_results);
1431
- offset += migrated.q_results.length;
1432
- }
1433
- else {
1434
- next = false;
1435
- }
1436
- }
1437
- return sections;
1438
- }
1439
- async loadWebsites(websites) {
1440
- for (const website of websites) {
1441
- this.sectionsByWebsite[website] = await this.loadWebsite(website);
1442
- }
1443
- this.websitesAreLoaded = true;
1444
- }
1445
- save(section) {
1446
- const website = section._website;
1447
- assert.ok(website, 'Section must have a website');
1448
- this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
1449
- if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
1450
- this.sectionsByWebsite[website].push(section);
1451
- }
1452
- }
1453
- getById(id, website) {
1454
- this.ensureWebsitesLoaded();
1455
- const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
1456
- return section;
1457
- }
1458
- getByWebsite(website) {
1459
- this.ensureWebsitesLoaded();
1460
- return this.sectionsByWebsite[website];
1461
- }
1462
- getParentSections(section) {
1463
- this.ensureWebsitesLoaded();
1464
- const parents = [];
1465
- let current = section;
1466
- while (current.parent?.default && current.parent.default !== '/') {
1467
- const parent = this.getById(current.parent.default, section._website);
1468
- if (!parent)
1469
- break;
1470
- parents.push(parent);
1471
- current = parent;
1472
- }
1473
- return parents;
1474
- }
1475
- ensureWebsitesLoaded() {
1476
- assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
1477
- }
1478
- }
1479
1278
 
1480
- var Section = /*#__PURE__*/Object.freeze({
1279
+ var html_utils = /*#__PURE__*/Object.freeze({
1481
1280
  __proto__: null,
1482
- SectionsRepository: SectionsRepository,
1483
- buildAndFlattenTree: buildAndFlattenTree,
1484
- buildTree: buildTree,
1485
- flattenTree: flattenTree,
1486
- groupByWebsites: groupByWebsites,
1487
- isReference: isReference,
1488
- references: references,
1489
- removeDuplicates: removeDuplicates
1281
+ getHTMLElementAttribute: getHTMLElementAttribute,
1282
+ htmlToText: htmlToText,
1283
+ isCommentNode: isCommentNode,
1284
+ isHTMLElement: isHTMLElement,
1285
+ isTextNode: isTextNode,
1286
+ nodeTagIn: nodeTagIn,
1287
+ nodeTagIs: nodeTagIs
1490
1288
  });
1491
1289
 
1492
- const ArcUtils = {
1493
- Id,
1494
- ANS,
1495
- ContentElements,
1496
- Section,
1497
- };
1498
-
1499
1290
  /**
1500
- * Base class for all arc entities, it provides common methods and properties
1501
- * If you want to create a new entity subtype you should extend this class
1291
+ * HTMLProcessor is responsible for parsing HTML content into structured content elements.
1292
+ * It provides a flexible way to handle different HTML nodes and wrap text content.
1502
1293
  *
1503
- * Use case: You want to migrate stories from BBC
1504
- * You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
1505
- * Then you can override the specific methods to enrich the story with the data from BBC
1294
+ * The processor can be extended with custom handlers for specific node types and
1295
+ * wrappers for text content.
1506
1296
  *
1507
- * To migrate it call .migrate() method
1508
- */
1509
- class Document {
1510
- constructor() {
1511
- this.ans = null;
1512
- this.circulations = [];
1513
- }
1514
- async init() {
1515
- // fetch necessary data and validate it here
1516
- }
1517
- async prepare() {
1518
- await this.init();
1297
+ * @example
1298
+ * ```ts
1299
+ * // Create and initialize processor
1300
+ * const processor = new HTMLProcessor();
1301
+ * processor.init();
1302
+ *
1303
+ * // Parse HTML content
1304
+ * const html = '<div><p>Some text</p><img src="image.jpg"></div>';
1305
+ * const elements = await processor.parse(html);
1306
+ * ```
1307
+ *
1308
+ * The processor comes with built-in handlers for common HTML elements like links,
1309
+ * text formatting (i, u, strong), and block elements. Custom handlers can be added
1310
+ * using the `handle()` and `wrap()` methods.
1311
+ */
1312
+ class HTMLProcessor {
1313
+ constructor() {
1314
+ this.parallelProcessing = true;
1315
+ this.handlers = {
1316
+ node: new Map(),
1317
+ wrap: new Map(),
1318
+ };
1319
+ }
1320
+ init() {
1321
+ // wrappers are used to wrap the content of nested text nodes
1322
+ // in a specific way
1323
+ this.wrap('link', (node, text) => {
1324
+ if (nodeTagIn(node, ['a'])) {
1325
+ const attributes = ['href', 'target', 'rel']
1326
+ .map((attr) => [attr, getHTMLElementAttribute(node, attr)])
1327
+ .filter(([_, value]) => value)
1328
+ .map(([key, value]) => `${key}="${value}"`)
1329
+ .join(' ');
1330
+ return {
1331
+ ...text,
1332
+ content: `<a ${attributes}>${text.content}</a>`,
1333
+ };
1334
+ }
1335
+ });
1336
+ this.wrap('i', (node, text) => {
1337
+ if (nodeTagIn(node, ['i'])) {
1338
+ return {
1339
+ ...text,
1340
+ content: `<i>${text.content}</i>`,
1341
+ };
1342
+ }
1343
+ });
1344
+ this.wrap('u', (node, text) => {
1345
+ if (nodeTagIn(node, ['u'])) {
1346
+ return {
1347
+ ...text,
1348
+ content: `<u>${text.content}</u>`,
1349
+ };
1350
+ }
1351
+ });
1352
+ this.wrap('sup/sub', (node, text) => {
1353
+ if (nodeTagIn(node, ['sup', 'sub'])) {
1354
+ return {
1355
+ ...text,
1356
+ content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
1357
+ };
1358
+ }
1359
+ });
1360
+ this.wrap('strong', (node, text) => {
1361
+ if (nodeTagIn(node, ['strong', 'b'])) {
1362
+ return {
1363
+ ...text,
1364
+ content: `<b>${text.content}</b>`,
1365
+ };
1366
+ }
1367
+ });
1368
+ this.wrap('center', (node, text) => {
1369
+ if (nodeTagIn(node, ['center'])) {
1370
+ return {
1371
+ ...text,
1372
+ alignment: 'center',
1373
+ };
1374
+ }
1375
+ });
1376
+ this.wrap('aligned-paragraph', (node, text) => {
1377
+ if (nodeTagIn(node, ['p'])) {
1378
+ const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
1379
+ if (!styleAttribute)
1380
+ return text;
1381
+ if (styleAttribute.includes('text-align: right;')) {
1382
+ return {
1383
+ ...text,
1384
+ alignment: 'right',
1385
+ };
1386
+ }
1387
+ if (styleAttribute.includes('text-align: left;')) {
1388
+ return {
1389
+ ...text,
1390
+ alignment: 'left',
1391
+ };
1392
+ }
1393
+ if (styleAttribute.includes('text-align: center;')) {
1394
+ return {
1395
+ ...text,
1396
+ alignment: 'center',
1397
+ };
1398
+ }
1399
+ return text;
1400
+ }
1401
+ });
1402
+ // handlers are used to handle specific nodes
1403
+ // and return a list of content elements
1404
+ this.handle('default', (node) => {
1405
+ const noTag = isHTMLElement(node) && !node.tagName;
1406
+ if (noTag ||
1407
+ nodeTagIn(node, [
1408
+ 'p',
1409
+ 'a',
1410
+ 'b',
1411
+ 'sup',
1412
+ 'sub',
1413
+ 'span',
1414
+ 'strong',
1415
+ 'em',
1416
+ 'i',
1417
+ 'u',
1418
+ 'section',
1419
+ 'main',
1420
+ 'div',
1421
+ 'li',
1422
+ 'center',
1423
+ ])) {
1424
+ return this.handleNested(node);
1425
+ }
1426
+ });
1427
+ this.handle('headers', (node) => {
1428
+ if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
1429
+ return this.createHeader(node);
1430
+ }
1431
+ });
1432
+ this.handle('text', (node) => {
1433
+ if (isTextNode(node)) {
1434
+ return this.createText(node);
1435
+ }
1436
+ });
1437
+ this.handle('comment', (node) => {
1438
+ if (isCommentNode(node)) {
1439
+ return this.handleComment(node);
1440
+ }
1441
+ });
1442
+ this.handle('list', async (node) => {
1443
+ if (nodeTagIn(node, ['ul', 'ol'])) {
1444
+ const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
1445
+ return this.createList(node, listType);
1446
+ }
1447
+ });
1448
+ this.handle('table', (node) => {
1449
+ if (nodeTagIs(node, 'table')) {
1450
+ return this.handleTable(node);
1451
+ }
1452
+ });
1453
+ this.handle('iframe', (node) => {
1454
+ if (nodeTagIs(node, 'iframe')) {
1455
+ return this.handleIframe(node);
1456
+ }
1457
+ });
1458
+ this.handle('img', (node) => {
1459
+ if (nodeTagIs(node, 'img')) {
1460
+ return this.handleImage(node);
1461
+ }
1462
+ });
1463
+ this.handle('br', (node) => {
1464
+ if (nodeTagIs(node, 'br')) {
1465
+ return this.handleBreak(node);
1466
+ }
1467
+ });
1468
+ }
1469
+ handle(name, handler) {
1470
+ if (this.handlers.node.has(name)) {
1471
+ this.warn({ name }, `${name} node handler already set`);
1472
+ }
1473
+ this.handlers.node.set(name, handler);
1474
+ }
1475
+ wrap(name, handler) {
1476
+ if (this.handlers.wrap.has(name)) {
1477
+ this.warn({ name }, `${name} wrap handler already set`);
1478
+ }
1479
+ this.handlers.wrap.set(name, handler);
1480
+ }
1481
+ async parse(html) {
1482
+ const doc = parse(html, { comment: true });
1483
+ doc.removeWhitespace();
1484
+ const elements = await this.process(doc);
1485
+ const filtered = elements?.filter((e) => e.type !== 'divider');
1486
+ return filtered || [];
1487
+ }
1488
+ addTextAdditionalProperties(c, parent) {
1489
+ const additionalProperties = c.additional_properties || {};
1490
+ const parentNodeIsBlockElement = this.isBlockElement(parent);
1491
+ c.additional_properties = {
1492
+ ...c.additional_properties,
1493
+ isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
1494
+ };
1495
+ return c;
1496
+ }
1497
+ /**
1498
+ * Wraps text content elements with additional properties and handlers.
1499
+ * This method iterates through an array of content elements and applies
1500
+ * wrappers to text elements.
1501
+ *
1502
+ * @param node - The HTML node containing the text elements
1503
+ **/
1504
+ wrapChildrenTextNodes(node, elements) {
1505
+ const wrapped = [];
1506
+ const wrappers = [...this.handlers.wrap.values()];
1507
+ for (const c of elements) {
1508
+ if (!isTextCE(c)) {
1509
+ wrapped.push(c);
1510
+ continue;
1511
+ }
1512
+ this.addTextAdditionalProperties(c, node);
1513
+ const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
1514
+ wrapped.push(handled || c);
1515
+ }
1516
+ return wrapped;
1517
+ }
1518
+ /**
1519
+ * Handles nested nodes by processing their children and merging text elements.
1520
+ * This method recursively processes the children of a given HTML node and
1521
+ * returns a list of content elements.
1522
+ *
1523
+ * @param node - The HTML node to process
1524
+ **/
1525
+ async handleNested(node) {
1526
+ const children = await this.processChildNodes(node);
1527
+ const filtered = children.filter(Boolean).flat();
1528
+ const merged = this.mergeParagraphs(filtered);
1529
+ const wrapped = this.wrapChildrenTextNodes(node, merged);
1530
+ return wrapped;
1531
+ }
1532
+ async processChildNodes(node) {
1533
+ if (this.parallelProcessing) {
1534
+ return await Promise.all(node.childNodes.map((child) => this.process(child)));
1535
+ }
1536
+ const children = [];
1537
+ for (const child of node.childNodes) {
1538
+ children.push(await this.process(child));
1539
+ }
1540
+ return children;
1541
+ }
1542
+ /**
1543
+ * Processes a single HTML node and converts it into content elements.
1544
+ * This method iterates through registered node handlers and attempts to process the node.
1545
+ * If a handler successfully processes the node, it returns an array of content elements.
1546
+ *
1547
+ * @param node - The HTML node to process
1548
+ * @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
1549
+ */
1550
+ async process(node) {
1551
+ let isKnownNode = false;
1552
+ const elements = [];
1553
+ for (const [name, handler] of this.handlers.node.entries()) {
1554
+ try {
1555
+ const result = await handler(node);
1556
+ if (result) {
1557
+ // if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
1558
+ isKnownNode = true;
1559
+ elements.push(...result);
1560
+ break;
1561
+ }
1562
+ }
1563
+ catch (error) {
1564
+ this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
1565
+ }
1566
+ }
1567
+ if (isKnownNode)
1568
+ return elements;
1569
+ this.warn({ node: node.toString() }, 'UnknownNodeError');
1570
+ }
1571
+ /**
1572
+ * Merges adjacent text content elements into a single paragraph.
1573
+ * This method iterates through an array of content elements and combines
1574
+ * adjacent text elements into a single paragraph.
1575
+ *
1576
+ * @param items - The array of content elements to merge
1577
+ **/
1578
+ mergeParagraphs(items) {
1579
+ const merged = [];
1580
+ let toMerge = [];
1581
+ const merge = () => {
1582
+ if (!toMerge.length)
1583
+ return;
1584
+ const paragraph = toMerge.reduce((acc, p) => {
1585
+ return {
1586
+ ...p,
1587
+ content: acc.content + p.content,
1588
+ };
1589
+ }, { type: 'text', content: '' });
1590
+ merged.push(paragraph);
1591
+ toMerge = [];
1592
+ };
1593
+ for (let i = 0; i < items.length; i++) {
1594
+ const item = items[i];
1595
+ const isBlockElement = item.additional_properties?.isBlockElement;
1596
+ if (isTextCE(item) && !isBlockElement) {
1597
+ toMerge.push(item);
1598
+ }
1599
+ else {
1600
+ merge();
1601
+ merged.push(item);
1602
+ }
1603
+ }
1604
+ merge();
1605
+ return merged;
1606
+ }
1607
+ handleComment(_) {
1608
+ return [];
1609
+ }
1610
+ async handleTable(node) {
1611
+ return [ContentElement.raw_html(node.toString())];
1612
+ }
1613
+ async handleIframe(node) {
1614
+ return [ContentElement.raw_html(node.toString())];
1615
+ }
1616
+ async handleImage(node) {
1617
+ return [ContentElement.raw_html(node.toString())];
1618
+ }
1619
+ async handleBreak(_) {
1620
+ return [ContentElement.divider()];
1621
+ }
1622
+ async createQuote(node) {
1623
+ const items = await this.handleNested(node);
1624
+ return [ContentElement.quote(items)];
1625
+ }
1626
+ async createText(node) {
1627
+ const text = ContentElement.text(node.text);
1628
+ return [text];
1629
+ }
1630
+ filterListItems(items) {
1631
+ return items.filter((i) => ['text', 'list'].includes(i.type));
1632
+ }
1633
+ async createList(node, type) {
1634
+ const items = await this.handleNested(node);
1635
+ return [ContentElement.list(type, this.filterListItems(items))];
1636
+ }
1637
+ async createHeader(node) {
1638
+ const level = +node.tagName.split('H')[1] || 3;
1639
+ return [ContentElement.header(node.innerText, level)];
1640
+ }
1641
+ isBlockElement(node) {
1642
+ if (!isHTMLElement(node))
1643
+ return false;
1644
+ const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
1645
+ return defaultBlockElements.has(node.tagName);
1646
+ }
1647
+ warn(metadata, message) {
1648
+ console.warn(metadata, message);
1649
+ }
1650
+ }
1651
+
1652
+ var index$3 = /*#__PURE__*/Object.freeze({
1653
+ __proto__: null,
1654
+ Constants: html_constants,
1655
+ HTMLProcessor: HTMLProcessor,
1656
+ Utils: html_utils
1657
+ });
1658
+
1659
+ var index$2 = /*#__PURE__*/Object.freeze({
1660
+ __proto__: null,
1661
+ ContentElement: ContentElement,
1662
+ HTML: index$3
1663
+ });
1664
+
1665
+ /**
1666
+ * Base class for all arc entities, it provides common methods and properties
1667
+ * If you want to create a new entity subtype you should extend this class
1668
+ *
1669
+ * Use case: You want to migrate stories from BBC
1670
+ * You define `class BBCStory extends ArcDocument<ANS.AStory>` and implement all abstract methods
1671
+ * Then you can override the specific methods to enrich the story with the data from BBC
1672
+ *
1673
+ * To migrate it call .migrate() method
1674
+ */
1675
+ class Document {
1676
+ constructor() {
1677
+ this.ans = null;
1678
+ this.circulations = [];
1679
+ }
1680
+ async init() {
1681
+ // fetch necessary data and validate it here
1682
+ }
1683
+ async prepare() {
1684
+ await this.init();
1519
1685
  const payload = await this.payload();
1520
1686
  const params = await this.params();
1521
1687
  return { payload, params };
@@ -1744,473 +1910,307 @@ class Story extends Document {
1744
1910
  }
1745
1911
  }
1746
1912
 
1747
- var index$2 = /*#__PURE__*/Object.freeze({
1913
+ var index$1 = /*#__PURE__*/Object.freeze({
1748
1914
  __proto__: null,
1749
1915
  Document: Document,
1750
1916
  Story: Story
1751
1917
  });
1752
1918
 
1753
- const BLOCK_ELEMENT_TAGS = [
1754
- 'ADDRESS',
1755
- 'ARTICLE',
1756
- 'ASIDE',
1757
- 'BLOCKQUOTE',
1758
- 'DETAILS',
1759
- 'DIV',
1760
- 'DL',
1761
- 'FIELDSET',
1762
- 'FIGCAPTION',
1763
- 'FIGURE',
1764
- 'FOOTER',
1765
- 'FORM',
1766
- 'H1',
1767
- 'H2',
1768
- 'H3',
1769
- 'H4',
1770
- 'H5',
1771
- 'H6',
1772
- 'HEADER',
1773
- 'HR',
1774
- 'LINE',
1775
- 'MAIN',
1776
- 'MENU',
1777
- 'NAV',
1778
- 'OL',
1779
- 'P',
1780
- 'PARAGRAPH',
1781
- 'PRE',
1782
- 'SECTION',
1783
- 'TABLE',
1784
- 'UL',
1785
- 'LI',
1786
- 'BODY',
1787
- 'HTML',
1788
- ];
1919
+ var ANSType;
1920
+ (function (ANSType) {
1921
+ ANSType["Story"] = "story";
1922
+ ANSType["Video"] = "video";
1923
+ ANSType["Tag"] = "tag";
1924
+ ANSType["Author"] = "author";
1925
+ ANSType["Gallery"] = "gallery";
1926
+ ANSType["Image"] = "image";
1927
+ ANSType["Redirect"] = "redirect";
1928
+ })(ANSType || (ANSType = {}));
1929
+ var MigrationStatus;
1930
+ (function (MigrationStatus) {
1931
+ MigrationStatus["Success"] = "Success";
1932
+ MigrationStatus["Queued"] = "Queued";
1933
+ MigrationStatus["Circulated"] = "Circulated";
1934
+ MigrationStatus["Published"] = "Published";
1935
+ MigrationStatus["Scheduled"] = "Scheduled";
1936
+ MigrationStatus["FailVideo"] = "FailVideo";
1937
+ MigrationStatus["FailImage"] = "FailImage";
1938
+ MigrationStatus["FailPhoto"] = "FailPhoto";
1939
+ MigrationStatus["FailStory"] = "FailStory";
1940
+ MigrationStatus["FailGallery"] = "FailGallery";
1941
+ MigrationStatus["FailAuthor"] = "FailAuthor";
1942
+ MigrationStatus["FailTag"] = "FailTag";
1943
+ MigrationStatus["ValidationFailed"] = "ValidationFailed";
1944
+ })(MigrationStatus || (MigrationStatus = {}));
1945
+ var SummarySortBy;
1946
+ (function (SummarySortBy) {
1947
+ SummarySortBy["CreateDate"] = "createDate";
1948
+ SummarySortBy["UpdateDate"] = "updateDate";
1949
+ SummarySortBy["Id"] = "id";
1950
+ })(SummarySortBy || (SummarySortBy = {}));
1951
+ var SummarySortOrder;
1952
+ (function (SummarySortOrder) {
1953
+ SummarySortOrder["ASC"] = "ASC";
1954
+ SummarySortOrder["DESC"] = "DESC";
1955
+ })(SummarySortOrder || (SummarySortOrder = {}));
1789
1956
 
1790
- var html_constants = /*#__PURE__*/Object.freeze({
1957
+ /* eslint-disable */
1958
+ /**
1959
+ * This file was automatically generated by json-schema-to-typescript.
1960
+ * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,
1961
+ * and run json-schema-to-typescript to regenerate this file.
1962
+ */
1963
+
1964
+ var ansTypes = /*#__PURE__*/Object.freeze({
1965
+ __proto__: null
1966
+ });
1967
+
1968
+ var utils = /*#__PURE__*/Object.freeze({
1969
+ __proto__: null
1970
+ });
1971
+
1972
+ var index = /*#__PURE__*/Object.freeze({
1791
1973
  __proto__: null,
1792
- BLOCK_ELEMENT_TAGS: BLOCK_ELEMENT_TAGS
1974
+ ANS: ansTypes,
1975
+ get ANSType () { return ANSType; },
1976
+ get MigrationStatus () { return MigrationStatus; },
1977
+ get SummarySortBy () { return SummarySortBy; },
1978
+ get SummarySortOrder () { return SummarySortOrder; },
1979
+ TypeUtils: utils
1793
1980
  });
1794
1981
 
1795
- const isTextNode = (node) => {
1796
- return node instanceof TextNode;
1797
- };
1798
- const isHTMLElement = (node) => {
1799
- return node instanceof HTMLElement;
1800
- };
1801
- const isCommentNode = (node) => {
1802
- return node instanceof CommentNode;
1803
- };
1804
- const nodeTagIs = (node, name) => {
1805
- return isHTMLElement(node) && node.tagName?.toLowerCase() === name.toLowerCase();
1806
- };
1807
- const nodeTagIn = (node, names) => {
1808
- return isHTMLElement(node) && names.includes(node.tagName?.toLowerCase());
1809
- };
1810
- const isTextCE = (ce) => {
1811
- return ce?.type === 'text';
1812
- };
1813
- const decodeHTMLEntities = (str) => decode(str);
1814
- const htmlToText = (html, parseOptions) => {
1815
- if (!html)
1816
- return '';
1817
- const doc = parse(html, parseOptions);
1818
- return decodeHTMLEntities(doc.innerText);
1819
- };
1820
- const getHTMLElementAttribute = (e, key) => {
1821
- const value = e.getAttribute(key);
1822
- if (value)
1823
- return value;
1824
- return new URLSearchParams(e.rawAttrs.replaceAll(' ', '&')).get(key);
1982
+ const reference = (ref) => {
1983
+ return {
1984
+ _id: ref.id,
1985
+ type: 'reference',
1986
+ referent: {
1987
+ ...ref,
1988
+ },
1989
+ };
1825
1990
  };
1826
1991
 
1827
- var html_utils = /*#__PURE__*/Object.freeze({
1992
+ var ANS = /*#__PURE__*/Object.freeze({
1828
1993
  __proto__: null,
1829
- decodeHTMLEntities: decodeHTMLEntities,
1830
- getHTMLElementAttribute: getHTMLElementAttribute,
1831
- htmlToText: htmlToText,
1832
- isCommentNode: isCommentNode,
1833
- isHTMLElement: isHTMLElement,
1834
- isTextCE: isTextCE,
1835
- isTextNode: isTextNode,
1836
- nodeTagIn: nodeTagIn,
1837
- nodeTagIs: nodeTagIs
1994
+ reference: reference
1838
1995
  });
1839
1996
 
1997
+ const generateArcId = (identifier, orgHostname) => {
1998
+ const namespace = v5(orgHostname, v5.DNS);
1999
+ const buffer = v5(identifier, namespace, Buffer.alloc(16));
2000
+ return encode(buffer, 'RFC4648', { padding: false });
2001
+ };
1840
2002
  /**
1841
- * HTMLProcessor is responsible for parsing HTML content into structured content elements.
1842
- * It provides a flexible way to handle different HTML nodes and wrap text content.
1843
- *
1844
- * The processor can be extended with custom handlers for specific node types and
1845
- * wrappers for text content.
2003
+ * Utility class for generating Arc IDs and source IDs
1846
2004
  *
1847
2005
  * @example
1848
2006
  * ```ts
1849
- * // Create and initialize processor
1850
- * const processor = new HTMLProcessor();
1851
- * processor.init();
1852
- *
1853
- * // Parse HTML content
1854
- * const html = '<div><p>Some text</p><img src="image.jpg"></div>';
1855
- * const elements = await processor.parse(html);
2007
+ * const generator = new IdGenerator(['my-org']);
2008
+ * const arcId = generator.getArcId('123'); // Generates a unique for 'my-org' Arc ID
2009
+ * const sourceId = generator.getSourceId('123', ['my-site']); // Generates 'my-site-123'
1856
2010
  * ```
1857
- *
1858
- * The processor comes with built-in handlers for common HTML elements like links,
1859
- * text formatting (i, u, strong), and block elements. Custom handlers can be added
1860
- * using the `handle()` and `wrap()` methods.
1861
2011
  */
1862
- class HTMLProcessor {
1863
- constructor() {
1864
- this.parallelProcessing = true;
1865
- this.handlers = {
1866
- node: new Map(),
1867
- wrap: new Map(),
1868
- };
1869
- }
1870
- init() {
1871
- // wrappers are used to wrap the content of nested text nodes
1872
- // in a specific way
1873
- this.wrap('link', (node, text) => {
1874
- if (nodeTagIn(node, ['a'])) {
1875
- const attributes = ['href', 'target', 'rel']
1876
- .map((attr) => [attr, getHTMLElementAttribute(node, attr)])
1877
- .filter(([_, value]) => value)
1878
- .map(([key, value]) => `${key}="${value}"`)
1879
- .join(' ');
1880
- return {
1881
- ...text,
1882
- content: `<a ${attributes}>${text.content}</a>`,
1883
- };
1884
- }
1885
- });
1886
- this.wrap('i', (node, text) => {
1887
- if (nodeTagIn(node, ['i'])) {
1888
- return {
1889
- ...text,
1890
- content: `<i>${text.content}</i>`,
1891
- };
1892
- }
1893
- });
1894
- this.wrap('u', (node, text) => {
1895
- if (nodeTagIn(node, ['u'])) {
1896
- return {
1897
- ...text,
1898
- content: `<u>${text.content}</u>`,
1899
- };
1900
- }
1901
- });
1902
- this.wrap('sup/sub', (node, text) => {
1903
- if (nodeTagIn(node, ['sup', 'sub'])) {
1904
- return {
1905
- ...text,
1906
- content: `<mark class="${node.tagName.toLowerCase()}">${text.content}</mark>`,
1907
- };
1908
- }
1909
- });
1910
- this.wrap('strong', (node, text) => {
1911
- if (nodeTagIn(node, ['strong', 'b'])) {
1912
- return {
1913
- ...text,
1914
- content: `<b>${text.content}</b>`,
1915
- };
1916
- }
1917
- });
1918
- this.wrap('center', (node, text) => {
1919
- if (nodeTagIn(node, ['center'])) {
1920
- return {
1921
- ...text,
1922
- alignment: 'center',
1923
- };
1924
- }
1925
- });
1926
- this.wrap('aligned-paragraph', (node, text) => {
1927
- if (nodeTagIn(node, ['p'])) {
1928
- const styleAttribute = getHTMLElementAttribute(node, 'style') || '';
1929
- if (!styleAttribute)
1930
- return text;
1931
- if (styleAttribute.includes('text-align: right;')) {
1932
- return {
1933
- ...text,
1934
- alignment: 'right',
1935
- };
1936
- }
1937
- if (styleAttribute.includes('text-align: left;')) {
1938
- return {
1939
- ...text,
1940
- alignment: 'left',
1941
- };
1942
- }
1943
- if (styleAttribute.includes('text-align: center;')) {
1944
- return {
1945
- ...text,
1946
- alignment: 'center',
1947
- };
1948
- }
1949
- return text;
1950
- }
1951
- });
1952
- // handlers are used to handle specific nodes
1953
- // and return a list of content elements
1954
- this.handle('default', (node) => {
1955
- const noTag = isHTMLElement(node) && !node.tagName;
1956
- if (noTag ||
1957
- nodeTagIn(node, [
1958
- 'p',
1959
- 'a',
1960
- 'b',
1961
- 'sup',
1962
- 'sub',
1963
- 'span',
1964
- 'strong',
1965
- 'em',
1966
- 'i',
1967
- 'u',
1968
- 'section',
1969
- 'main',
1970
- 'div',
1971
- 'li',
1972
- 'center',
1973
- ])) {
1974
- return this.handleNested(node);
1975
- }
1976
- });
1977
- this.handle('headers', (node) => {
1978
- if (nodeTagIn(node, ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])) {
1979
- return this.createHeader(node);
1980
- }
1981
- });
1982
- this.handle('text', (node) => {
1983
- if (isTextNode(node)) {
1984
- return this.createText(node);
1985
- }
1986
- });
1987
- this.handle('comment', (node) => {
1988
- if (isCommentNode(node)) {
1989
- return this.handleComment(node);
1990
- }
1991
- });
1992
- this.handle('list', async (node) => {
1993
- if (nodeTagIn(node, ['ul', 'ol'])) {
1994
- const listType = node.tagName === 'UL' ? 'unordered' : 'ordered';
1995
- return this.createList(node, listType);
1996
- }
1997
- });
1998
- this.handle('table', (node) => {
1999
- if (nodeTagIs(node, 'table')) {
2000
- return this.handleTable(node);
2001
- }
2002
- });
2003
- this.handle('iframe', (node) => {
2004
- if (nodeTagIs(node, 'iframe')) {
2005
- return this.handleIframe(node);
2006
- }
2007
- });
2008
- this.handle('img', (node) => {
2009
- if (nodeTagIs(node, 'img')) {
2010
- return this.handleImage(node);
2011
- }
2012
- });
2013
- this.handle('br', (node) => {
2014
- if (nodeTagIs(node, 'br')) {
2015
- return this.handleBreak(node);
2016
- }
2017
- });
2018
- }
2019
- handle(name, handler) {
2020
- if (this.handlers.node.has(name)) {
2021
- this.warn({ name }, `${name} node handler already set`);
2012
+ class IdGenerator {
2013
+ constructor(namespaces) {
2014
+ if (!namespaces.length) {
2015
+ throw new Error('At least 1 namespace is required');
2022
2016
  }
2023
- this.handlers.node.set(name, handler);
2017
+ this.namespace = namespaces.join('-');
2024
2018
  }
2025
- wrap(name, handler) {
2026
- if (this.handlers.wrap.has(name)) {
2027
- this.warn({ name }, `${name} wrap handler already set`);
2028
- }
2029
- this.handlers.wrap.set(name, handler);
2019
+ getArcId(id) {
2020
+ return generateArcId(id.toString(), this.namespace);
2030
2021
  }
2031
- async parse(html) {
2032
- const doc = parse(html, { comment: true });
2033
- doc.removeWhitespace();
2034
- const elements = await this.process(doc);
2035
- const filtered = elements?.filter((e) => e.type !== 'divider');
2036
- return filtered || [];
2022
+ getSourceId(id, prefixes = []) {
2023
+ return [...prefixes, id].join('-');
2037
2024
  }
2038
- addTextAdditionalProperties(c, parent) {
2039
- const additionalProperties = c.additional_properties || {};
2040
- const parentNodeIsBlockElement = this.isBlockElement(parent);
2041
- c.additional_properties = {
2042
- ...c.additional_properties,
2043
- isBlockElement: additionalProperties.isBlockElement || parentNodeIsBlockElement,
2025
+ }
2026
+
2027
+ var Id = /*#__PURE__*/Object.freeze({
2028
+ __proto__: null,
2029
+ IdGenerator: IdGenerator,
2030
+ generateArcId: generateArcId
2031
+ });
2032
+
2033
+ const buildTree = (items) => {
2034
+ const tree = [
2035
+ {
2036
+ id: '/',
2037
+ children: [],
2038
+ meta: new Proxy({}, {
2039
+ get: () => {
2040
+ throw new Error('Root node meta is not accessible');
2041
+ },
2042
+ }),
2043
+ parent: null,
2044
+ },
2045
+ ];
2046
+ // Track nodes at each level to maintain parent-child relationships
2047
+ // stores last node at each level
2048
+ const currLevelNodes = {
2049
+ 0: tree[0],
2050
+ };
2051
+ for (const item of items) {
2052
+ const node = {
2053
+ id: item.id,
2054
+ parent: null,
2055
+ children: [],
2056
+ meta: item,
2044
2057
  };
2045
- return c;
2046
- }
2047
- /**
2048
- * Wraps text content elements with additional properties and handlers.
2049
- * This method iterates through an array of content elements and applies
2050
- * wrappers to text elements.
2051
- *
2052
- * @param node - The HTML node containing the text elements
2053
- **/
2054
- wrapChildrenTextNodes(node, elements) {
2055
- const wrapped = [];
2056
- const wrappers = [...this.handlers.wrap.values()];
2057
- for (const c of elements) {
2058
- if (!isTextCE(c)) {
2059
- wrapped.push(c);
2060
- continue;
2061
- }
2062
- this.addTextAdditionalProperties(c, node);
2063
- const handled = wrappers.map((wrapper) => wrapper(node, c)).find(Boolean);
2064
- wrapped.push(handled || c);
2058
+ // Determine the level of this node
2059
+ const levelKey = Object.keys(item).find((key) => key.startsWith('N') && item[key]);
2060
+ const level = Number(levelKey?.replace('N', '')) || 0;
2061
+ if (!level) {
2062
+ throw new Error(`Invalid level for section ${item.id}`);
2065
2063
  }
2066
- return wrapped;
2064
+ // This is a child node - attach to its parent
2065
+ const parentLevel = level - 1;
2066
+ const parentNode = currLevelNodes[parentLevel];
2067
+ if (parentNode) {
2068
+ node.parent = parentNode;
2069
+ parentNode.children.push(node);
2070
+ }
2071
+ else {
2072
+ throw new Error(`Parent node not found for section ${item.id}`);
2073
+ }
2074
+ // Set this as the current node for its level
2075
+ currLevelNodes[level] = node;
2067
2076
  }
2068
- /**
2069
- * Handles nested nodes by processing their children and merging text elements.
2070
- * This method recursively processes the children of a given HTML node and
2071
- * returns a list of content elements.
2072
- *
2073
- * @param node - The HTML node to process
2074
- **/
2075
- async handleNested(node) {
2076
- const children = await this.processChildNodes(node);
2077
- const filtered = children.filter(Boolean).flat();
2078
- const merged = this.mergeParagraphs(filtered);
2079
- const wrapped = this.wrapChildrenTextNodes(node, merged);
2080
- return wrapped;
2077
+ // return root nodes children
2078
+ return tree[0].children;
2079
+ };
2080
+ const flattenTree = (tree) => {
2081
+ const flatten = [];
2082
+ const traverse = (node) => {
2083
+ flatten.push(node);
2084
+ for (const child of node.children) {
2085
+ traverse(child);
2086
+ }
2087
+ };
2088
+ // traverse all root nodes and their children
2089
+ for (const node of tree) {
2090
+ traverse(node);
2081
2091
  }
2082
- async processChildNodes(node) {
2083
- if (this.parallelProcessing) {
2084
- return await Promise.all(node.childNodes.map((child) => this.process(child)));
2092
+ return flatten;
2093
+ };
2094
+ const buildAndFlattenTree = (items) => flattenTree(buildTree(items));
2095
+ const groupByWebsites = (sections) => {
2096
+ return sections.reduce((acc, section) => {
2097
+ const website = section._website;
2098
+ if (!acc[website])
2099
+ acc[website] = [];
2100
+ acc[website].push(section);
2101
+ return acc;
2102
+ }, {});
2103
+ };
2104
+ const references = (sections) => {
2105
+ return sections.map((s) => reference({
2106
+ id: s._id,
2107
+ website: s._website,
2108
+ type: 'section',
2109
+ }));
2110
+ };
2111
+ const isReference = (section) => {
2112
+ return section?.type === 'reference' && section?.referent?.type === 'section';
2113
+ };
2114
+ const removeDuplicates = (sections) => {
2115
+ const map = new Map();
2116
+ sections.forEach((s) => {
2117
+ if (isReference(s)) {
2118
+ map.set(`${s.referent.id}${s.referent.website}`, s);
2085
2119
  }
2086
- const children = [];
2087
- for (const child of node.childNodes) {
2088
- children.push(await this.process(child));
2120
+ else {
2121
+ map.set(`${s._id}${s._website}`, s);
2089
2122
  }
2090
- return children;
2123
+ });
2124
+ return [...map.values()];
2125
+ };
2126
+ class SectionsRepository {
2127
+ constructor(arc) {
2128
+ this.arc = arc;
2129
+ this.sectionsByWebsite = {};
2130
+ this.websitesAreLoaded = false;
2091
2131
  }
2092
- /**
2093
- * Processes a single HTML node and converts it into content elements.
2094
- * This method iterates through registered node handlers and attempts to process the node.
2095
- * If a handler successfully processes the node, it returns an array of content elements.
2096
- *
2097
- * @param node - The HTML node to process
2098
- * @returns Promise resolving to an array of content elements, or undefined if node cannot be processed
2099
- */
2100
- async process(node) {
2101
- let isKnownNode = false;
2102
- const elements = [];
2103
- for (const [name, handler] of this.handlers.node.entries()) {
2104
- try {
2105
- const result = await handler(node);
2106
- if (result) {
2107
- // if handler returns an array of elements, it means that the node was handled properly, even if there is no elements inside
2108
- isKnownNode = true;
2109
- elements.push(...result);
2110
- break;
2111
- }
2112
- }
2113
- catch (error) {
2114
- this.warn({ node: node.toString(), error: error.toString(), name }, 'HandlerError');
2115
- }
2116
- }
2117
- if (isKnownNode)
2118
- return elements;
2119
- this.warn({ node: node.toString() }, 'UnknownNodeError');
2132
+ async put(ans) {
2133
+ await this.arc.Site.putSection(ans);
2134
+ const created = await this.arc.Site.getSection(ans._id, ans.website);
2135
+ this.save(created);
2120
2136
  }
2121
- /**
2122
- * Merges adjacent text content elements into a single paragraph.
2123
- * This method iterates through an array of content elements and combines
2124
- * adjacent text elements into a single paragraph.
2125
- *
2126
- * @param items - The array of content elements to merge
2127
- **/
2128
- mergeParagraphs(items) {
2129
- const merged = [];
2130
- let toMerge = [];
2131
- const merge = () => {
2132
- if (!toMerge.length)
2133
- return;
2134
- const paragraph = toMerge.reduce((acc, p) => {
2135
- return {
2136
- ...p,
2137
- content: acc.content + p.content,
2138
- };
2139
- }, { type: 'text', content: '' });
2140
- merged.push(paragraph);
2141
- toMerge = [];
2142
- };
2143
- for (let i = 0; i < items.length; i++) {
2144
- const item = items[i];
2145
- const isBlockElement = item.additional_properties?.isBlockElement;
2146
- if (isTextCE(item) && !isBlockElement) {
2147
- toMerge.push(item);
2137
+ async loadWebsite(website) {
2138
+ const sections = [];
2139
+ let next = true;
2140
+ let offset = 0;
2141
+ while (next) {
2142
+ const migrated = await this.arc.Site.getSections({ website, offset }).catch((_) => {
2143
+ return { q_results: [] };
2144
+ });
2145
+ if (migrated.q_results.length) {
2146
+ sections.push(...migrated.q_results);
2147
+ offset += migrated.q_results.length;
2148
2148
  }
2149
2149
  else {
2150
- merge();
2151
- merged.push(item);
2150
+ next = false;
2152
2151
  }
2153
2152
  }
2154
- merge();
2155
- return merged;
2156
- }
2157
- handleComment(_) {
2158
- return [];
2159
- }
2160
- async handleTable(node) {
2161
- return [ContentElement.raw_html(node.toString())];
2162
- }
2163
- async handleIframe(node) {
2164
- return [ContentElement.raw_html(node.toString())];
2165
- }
2166
- async handleImage(node) {
2167
- return [ContentElement.raw_html(node.toString())];
2168
- }
2169
- async handleBreak(_) {
2170
- return [ContentElement.divider()];
2171
- }
2172
- async createQuote(node) {
2173
- const items = await this.handleNested(node);
2174
- return [ContentElement.quote(items)];
2153
+ return sections;
2175
2154
  }
2176
- async createText(node) {
2177
- const text = ContentElement.text(node.text);
2178
- return [text];
2155
+ async loadWebsites(websites) {
2156
+ for (const website of websites) {
2157
+ this.sectionsByWebsite[website] = await this.loadWebsite(website);
2158
+ }
2159
+ this.websitesAreLoaded = true;
2179
2160
  }
2180
- filterListItems(items) {
2181
- return items.filter((i) => ['text', 'list'].includes(i.type));
2161
+ save(section) {
2162
+ const website = section._website;
2163
+ assert.ok(website, 'Section must have a website');
2164
+ this.sectionsByWebsite[website] = this.sectionsByWebsite[website] || [];
2165
+ if (!this.sectionsByWebsite[website].find((s) => s._id === section._id)) {
2166
+ this.sectionsByWebsite[website].push(section);
2167
+ }
2182
2168
  }
2183
- async createList(node, type) {
2184
- const items = await this.handleNested(node);
2185
- return [ContentElement.list(type, this.filterListItems(items))];
2169
+ getById(id, website) {
2170
+ this.ensureWebsitesLoaded();
2171
+ const section = this.sectionsByWebsite[website]?.find((s) => s._id === id);
2172
+ return section;
2186
2173
  }
2187
- async createHeader(node) {
2188
- const level = +node.tagName.split('H')[1] || 3;
2189
- return [ContentElement.header(node.innerText, level)];
2174
+ getByWebsite(website) {
2175
+ this.ensureWebsitesLoaded();
2176
+ return this.sectionsByWebsite[website];
2190
2177
  }
2191
- isBlockElement(node) {
2192
- if (!isHTMLElement(node))
2193
- return false;
2194
- const defaultBlockElements = new Set(BLOCK_ELEMENT_TAGS);
2195
- return defaultBlockElements.has(node.tagName);
2178
+ getParentSections(section) {
2179
+ this.ensureWebsitesLoaded();
2180
+ const parents = [];
2181
+ let current = section;
2182
+ while (current.parent?.default && current.parent.default !== '/') {
2183
+ const parent = this.getById(current.parent.default, section._website);
2184
+ if (!parent)
2185
+ break;
2186
+ parents.push(parent);
2187
+ current = parent;
2188
+ }
2189
+ return parents;
2196
2190
  }
2197
- warn(metadata, message) {
2198
- console.warn(metadata, message);
2191
+ ensureWebsitesLoaded() {
2192
+ assert.ok(this.websitesAreLoaded, 'call .loadWebsites() first');
2199
2193
  }
2200
2194
  }
2201
2195
 
2202
- var index$1 = /*#__PURE__*/Object.freeze({
2196
+ var Section = /*#__PURE__*/Object.freeze({
2203
2197
  __proto__: null,
2204
- Constants: html_constants,
2205
- HTMLProcessor: HTMLProcessor,
2206
- Utils: html_utils
2198
+ SectionsRepository: SectionsRepository,
2199
+ buildAndFlattenTree: buildAndFlattenTree,
2200
+ buildTree: buildTree,
2201
+ flattenTree: flattenTree,
2202
+ groupByWebsites: groupByWebsites,
2203
+ isReference: isReference,
2204
+ references: references,
2205
+ removeDuplicates: removeDuplicates
2207
2206
  });
2208
2207
 
2209
- var index = /*#__PURE__*/Object.freeze({
2210
- __proto__: null,
2211
- ContentElement: ContentElement,
2212
- HTML: index$1
2213
- });
2208
+ const ArcUtils = {
2209
+ Id,
2210
+ ANS,
2211
+ ContentElements,
2212
+ Section,
2213
+ };
2214
2214
 
2215
- export { index$2 as AnsMapper, ArcAPI, ArcError, index$3 as ArcTypes, ArcUtils, index as ContentElements, WsClient, ArcAPI as default };
2215
+ export { index$1 as AnsMapper, ArcAPI, ArcError, index as ArcTypes, ArcUtils, index$2 as ContentElements, WsClient, ArcAPI as default };
2216
2216
  //# sourceMappingURL=index.js.map