@vannizhang/living-atlas-content-validator 1.5.17 → 1.5.18-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/README.md +1376 -162
  2. package/dist/configureSettings.d.ts +133 -0
  3. package/dist/configureSettings.js +61 -0
  4. package/dist/configureSettings.js.map +1 -0
  5. package/dist/data/TitleSummaryMatchingPatterns_Locations.json +829 -0
  6. package/dist/data/TitleSummaryMatchingPatterns_Sources.json +382 -0
  7. package/dist/data/TitleSummaryMatchingPatterns_Time.json +25 -0
  8. package/dist/data/TitleSummaryMatchingPatterns_Topics.json +1365 -0
  9. package/dist/data/TitleSummaryRejectedPatterns.json +44 -0
  10. package/dist/index.d.ts +27 -9
  11. package/dist/index.js +55 -9
  12. package/dist/index.js.map +1 -1
  13. package/dist/lib/accessInformation/isValidAccessInformation.js +4 -6
  14. package/dist/lib/accessInformation/isValidAccessInformation.js.map +1 -1
  15. package/dist/lib/checkProfanities/checkProfanities.d.ts +7 -0
  16. package/dist/lib/checkProfanities/checkProfanities.js +26 -0
  17. package/dist/lib/checkProfanities/checkProfanities.js.map +1 -0
  18. package/dist/lib/checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.d.ts +34 -0
  19. package/dist/lib/{recommendedText/checkRecommendedText.js → checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.js} +51 -25
  20. package/dist/lib/checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.js.map +1 -0
  21. package/dist/lib/checkTitleAndSnippetSearchability/config.d.ts +97 -0
  22. package/dist/lib/checkTitleAndSnippetSearchability/config.js +41 -0
  23. package/dist/lib/checkTitleAndSnippetSearchability/config.js.map +1 -0
  24. package/dist/lib/checkTitleAndSnippetSearchability/data.d.ts +0 -0
  25. package/dist/lib/checkTitleAndSnippetSearchability/data.js +693 -0
  26. package/dist/lib/checkTitleAndSnippetSearchability/data.js.map +1 -0
  27. package/dist/lib/checkTitleAndSnippetSearchability/helpers.d.ts +117 -0
  28. package/dist/lib/checkTitleAndSnippetSearchability/helpers.js +241 -0
  29. package/dist/lib/checkTitleAndSnippetSearchability/helpers.js.map +1 -0
  30. package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.d.ts +34 -0
  31. package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.js +207 -0
  32. package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.js.map +1 -0
  33. package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.d.ts +34 -0
  34. package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.js +170 -0
  35. package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.js.map +1 -0
  36. package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.d.ts +34 -0
  37. package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.js +172 -0
  38. package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.js.map +1 -0
  39. package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.d.ts +36 -0
  40. package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.js +175 -0
  41. package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.js.map +1 -0
  42. package/dist/lib/checkTitleAndSnippetSearchability/scoringConfig.js.map +1 -0
  43. package/dist/lib/config.d.ts +4 -1
  44. package/dist/lib/config.js +5 -5
  45. package/dist/lib/config.js.map +1 -1
  46. package/dist/lib/deleteProtection/isDeleteProtectionEnabled.js +6 -2
  47. package/dist/lib/deleteProtection/isDeleteProtectionEnabled.js.map +1 -1
  48. package/dist/lib/deprecated/isDeprecated.js +5 -2
  49. package/dist/lib/deprecated/isDeprecated.js.map +1 -1
  50. package/dist/lib/description/isValidDescription.js +8 -5
  51. package/dist/lib/description/isValidDescription.js.map +1 -1
  52. package/dist/lib/licenseInfo/isValidLicenseInfo.js +5 -2
  53. package/dist/lib/licenseInfo/isValidLicenseInfo.js.map +1 -1
  54. package/dist/lib/sharing/isValidAccess.js +5 -2
  55. package/dist/lib/sharing/isValidAccess.js.map +1 -1
  56. package/dist/lib/ssl/isValidSSL.js +15 -6
  57. package/dist/lib/ssl/isValidSSL.js.map +1 -1
  58. package/dist/lib/summary/isValidSummary.js +6 -3
  59. package/dist/lib/summary/isValidSummary.js.map +1 -1
  60. package/dist/lib/tags/isValidTags.js +7 -3
  61. package/dist/lib/tags/isValidTags.js.map +1 -1
  62. package/dist/lib/thumbnail/isValidThumbnail.d.ts +6 -1
  63. package/dist/lib/thumbnail/isValidThumbnail.js +10 -7
  64. package/dist/lib/thumbnail/isValidThumbnail.js.map +1 -1
  65. package/dist/lib/title/isValidTitle.js +9 -3
  66. package/dist/lib/title/isValidTitle.js.map +1 -1
  67. package/dist/lib/userProfileDescription/isValidUserProfileDescription.js +44 -38
  68. package/dist/lib/userProfileDescription/isValidUserProfileDescription.js.map +1 -1
  69. package/dist/lib/userProfileName/isValidUserProfileName.js +8 -5
  70. package/dist/lib/userProfileName/isValidUserProfileName.js.map +1 -1
  71. package/dist/lib/userProfileThumbnail/isValidUserProfileThumbnail.js +6 -3
  72. package/dist/lib/userProfileThumbnail/isValidUserProfileThumbnail.js.map +1 -1
  73. package/dist/lib/util/containsNonEnglishCharacters.d.ts +13 -0
  74. package/dist/lib/util/containsNonEnglishCharacters.js +30 -0
  75. package/dist/lib/util/containsNonEnglishCharacters.js.map +1 -0
  76. package/dist/lib/util/countSentences.d.ts +23 -0
  77. package/dist/lib/util/countSentences.js +54 -0
  78. package/dist/lib/util/countSentences.js.map +1 -0
  79. package/dist/lib/util/createWordBoundaryRegex.d.ts +46 -0
  80. package/dist/lib/util/createWordBoundaryRegex.js +77 -0
  81. package/dist/lib/util/createWordBoundaryRegex.js.map +1 -0
  82. package/dist/lib/util/escapeSpecialCharacters.d.ts +7 -0
  83. package/dist/lib/util/escapeSpecialCharacters.js +22 -0
  84. package/dist/lib/util/escapeSpecialCharacters.js.map +1 -0
  85. package/dist/lib/util/getLivingAtlasSupportedItemTypes.d.ts +21 -0
  86. package/dist/lib/util/getLivingAtlasSupportedItemTypes.js +34 -0
  87. package/dist/lib/util/getLivingAtlasSupportedItemTypes.js.map +1 -0
  88. package/dist/lib/util/getNumberOfWords.js +6 -2
  89. package/dist/lib/util/getNumberOfWords.js.map +1 -1
  90. package/dist/lib/util/getScoringRules.js +6 -0
  91. package/dist/lib/util/getScoringRules.js.map +1 -1
  92. package/dist/lib/util/isLayer.js +3 -2
  93. package/dist/lib/util/isLayer.js.map +1 -1
  94. package/dist/lib/util/isUrl.d.ts +6 -0
  95. package/dist/lib/util/isUrl.js +16 -1
  96. package/dist/lib/util/isUrl.js.map +1 -1
  97. package/dist/lib/util/isValidRegexPattern.d.ts +7 -0
  98. package/dist/lib/util/isValidRegexPattern.js +20 -0
  99. package/dist/lib/util/isValidRegexPattern.js.map +1 -0
  100. package/dist/lib/util/sanitizeTags.d.ts +6 -0
  101. package/dist/lib/util/sanitizeTags.js +16 -0
  102. package/dist/lib/util/sanitizeTags.js.map +1 -0
  103. package/dist/lib/util/shouldValidateByBetaRules.js +6 -1
  104. package/dist/lib/util/shouldValidateByBetaRules.js.map +1 -1
  105. package/dist/lib/util/stringsConfig.d.ts +9 -0
  106. package/dist/lib/util/stringsConfig.js +9 -1
  107. package/dist/lib/util/stringsConfig.js.map +1 -1
  108. package/dist/lib/validate/validate.d.ts +57 -12
  109. package/dist/lib/validate/validate.js +87 -213
  110. package/dist/lib/validate/validate.js.map +1 -1
  111. package/dist/lib/validate/validateHelpers.d.ts +63 -0
  112. package/dist/lib/validate/validateHelpers.js +157 -0
  113. package/dist/lib/validate/validateHelpers.js.map +1 -0
  114. package/dist/locale/de.json +1 -1
  115. package/dist/locale/en.json +23 -23
  116. package/dist/locale/es.json +1 -1
  117. package/dist/locale/fr.json +1 -1
  118. package/dist/locale/ja.json +1 -1
  119. package/dist/locale/pt-br.json +1 -1
  120. package/dist/package-info.json +1 -1
  121. package/dist/services/content-validator-assets/config.d.ts +4 -0
  122. package/dist/services/content-validator-assets/config.js +8 -0
  123. package/dist/services/content-validator-assets/config.js.map +1 -0
  124. package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.d.ts +95 -0
  125. package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.js +92 -0
  126. package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.js.map +1 -0
  127. package/dist/services/content-validator-assets/fetchProfanitiesData.d.ts +18 -0
  128. package/dist/services/content-validator-assets/fetchProfanitiesData.js +55 -0
  129. package/dist/services/content-validator-assets/fetchProfanitiesData.js.map +1 -0
  130. package/dist/services/content-validator-assets/helpers.d.ts +16 -0
  131. package/dist/services/content-validator-assets/helpers.js +35 -0
  132. package/dist/services/content-validator-assets/helpers.js.map +1 -0
  133. package/dist/services/custom-terms/config.d.ts +27 -0
  134. package/dist/services/custom-terms/config.js +79 -0
  135. package/dist/services/custom-terms/config.js.map +1 -0
  136. package/dist/services/custom-terms/customTerms.d.ts +229 -0
  137. package/dist/services/custom-terms/customTerms.js +394 -0
  138. package/dist/services/custom-terms/customTerms.js.map +1 -0
  139. package/dist/services/custom-terms/helpers.d.ts +8 -0
  140. package/dist/services/custom-terms/helpers.js +25 -0
  141. package/dist/services/custom-terms/helpers.js.map +1 -0
  142. package/dist/services/custom-terms/index.d.ts +3 -0
  143. package/dist/services/custom-terms/index.js +10 -0
  144. package/dist/services/custom-terms/index.js.map +1 -0
  145. package/dist/services/custom-terms-review-results/config.d.ts +31 -0
  146. package/dist/services/custom-terms-review-results/config.js +78 -0
  147. package/dist/services/custom-terms-review-results/config.js.map +1 -0
  148. package/dist/services/custom-terms-review-results/customTermsReviewResults.d.ts +133 -0
  149. package/dist/services/custom-terms-review-results/customTermsReviewResults.js +276 -0
  150. package/dist/services/custom-terms-review-results/customTermsReviewResults.js.map +1 -0
  151. package/dist/services/custom-terms-review-results/helpers.d.ts +24 -0
  152. package/dist/services/custom-terms-review-results/helpers.js +52 -0
  153. package/dist/services/custom-terms-review-results/helpers.js.map +1 -0
  154. package/dist/services/custom-terms-review-results/index.d.ts +4 -0
  155. package/dist/services/custom-terms-review-results/index.js +13 -0
  156. package/dist/services/custom-terms-review-results/index.js.map +1 -0
  157. package/dist/services/shared/addFeatures.d.ts +28 -0
  158. package/dist/services/shared/addFeatures.js +52 -0
  159. package/dist/services/shared/addFeatures.js.map +1 -0
  160. package/dist/services/shared/applyEdits.d.ts +28 -0
  161. package/dist/services/shared/applyEdits.js +53 -0
  162. package/dist/services/shared/applyEdits.js.map +1 -0
  163. package/dist/services/shared/config.d.ts +44 -0
  164. package/dist/services/shared/config.js +35 -0
  165. package/dist/services/shared/config.js.map +1 -0
  166. package/dist/services/shared/getItemInfo.d.ts +36 -0
  167. package/dist/services/shared/getItemInfo.js +56 -0
  168. package/dist/services/shared/getItemInfo.js.map +1 -0
  169. package/dist/types/index.d.ts +25 -22
  170. package/package.json +4 -3
  171. package/dist/__tests__/test-data/title-summary-data.json +0 -14654
  172. package/dist/data/TitleSummaryMatchingPatterns.json +0 -1902
  173. package/dist/lib/layers/isValidLayerCount.d.ts +0 -12
  174. package/dist/lib/layers/isValidLayerCount.js +0 -171
  175. package/dist/lib/layers/isValidLayerCount.js.map +0 -1
  176. package/dist/lib/layers/scoringConfig.d.ts +0 -10
  177. package/dist/lib/layers/scoringConfig.js +0 -20
  178. package/dist/lib/layers/scoringConfig.js.map +0 -1
  179. package/dist/lib/recommendedText/checkRecommendedText.d.ts +0 -15
  180. package/dist/lib/recommendedText/checkRecommendedText.js.map +0 -1
  181. package/dist/lib/recommendedText/helpers.d.ts +0 -15
  182. package/dist/lib/recommendedText/helpers.js +0 -62
  183. package/dist/lib/recommendedText/helpers.js.map +0 -1
  184. package/dist/lib/recommendedText/matchDateTimeInfo.d.ts +0 -9
  185. package/dist/lib/recommendedText/matchDateTimeInfo.js +0 -81
  186. package/dist/lib/recommendedText/matchDateTimeInfo.js.map +0 -1
  187. package/dist/lib/recommendedText/matchLocationInfo.d.ts +0 -9
  188. package/dist/lib/recommendedText/matchLocationInfo.js +0 -745
  189. package/dist/lib/recommendedText/matchLocationInfo.js.map +0 -1
  190. package/dist/lib/recommendedText/matchSourceInfo.d.ts +0 -9
  191. package/dist/lib/recommendedText/matchSourceInfo.js +0 -32
  192. package/dist/lib/recommendedText/matchSourceInfo.js.map +0 -1
  193. package/dist/lib/recommendedText/matchTopicInfo.d.ts +0 -9
  194. package/dist/lib/recommendedText/matchTopicInfo.js +0 -32
  195. package/dist/lib/recommendedText/matchTopicInfo.js.map +0 -1
  196. package/dist/lib/recommendedText/scoringConfig.js.map +0 -1
  197. /package/dist/lib/{recommendedText → checkTitleAndSnippetSearchability}/scoringConfig.d.ts +0 -0
  198. /package/dist/lib/{recommendedText → checkTitleAndSnippetSearchability}/scoringConfig.js +0 -0
@@ -0,0 +1,34 @@
1
+ import { IItem } from '../../types';
2
+ import { MatchResult } from './helpers';
3
+ /**
4
+ * Sets extra matching and rejected patterns for locations and updates the corresponding regex and sets.
5
+ *
6
+ * @param extraMatchingPatternsForLocations - An array of strings representing additional patterns to match locations.
7
+ * @param extraRejectedPatternsForLocations - An array of strings representing additional patterns to reject locations.
8
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
9
+ *
10
+ * @remarks
11
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
12
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
13
+ */
14
+ export declare const setExtraMatchingPatternsForLocations: (extraMatchingPatternsForLocations?: string[], extraRejectedPatternsForLocations?: string[], shouldAvoidUsingWordBoundary?: boolean) => void;
15
+ /**
16
+ * Matches location information from an item's title and snippet using predefined or custom matching patterns.
17
+ *
18
+ * @param {IItem} item - The item containing title and snippet to search for location information.
19
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be included in the search.
20
+ * @returns {string[]} - An array of matched results for location info after deduplication and removing overlaps.
21
+ */
22
+ export declare const matchLocationInfo: (item: IItem, customMatchingPattenrs?: string[]) => MatchResult[];
23
+ /**
24
+ * Determines if a given string is a recognized location information.
25
+ * @param topic - The string to check.
26
+ * @returns {boolean} - True if the string is a recognized location, false otherwise.
27
+ */
28
+ export declare const isRecognizedLocation: (str: string) => boolean;
29
+ /**
30
+ * Determines if a given string is a rejected source information.
31
+ * @param str - The string to check.
32
+ * @returns {boolean} - True if the string is a rejected source information, false otherwise.
33
+ */
34
+ export declare const isRejectedLocationInfo: (str: string) => boolean;
@@ -0,0 +1,170 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.isRejectedLocationInfo = exports.isRecognizedLocation = exports.matchLocationInfo = exports.setExtraMatchingPatternsForLocations = void 0;
7
+ const helpers_1 = require("./helpers");
8
+ const TitleSummaryMatchingPatterns_Locations_json_1 = __importDefault(require("../../data/TitleSummaryMatchingPatterns_Locations.json"));
9
+ const TitleSummaryRejectedPatterns_json_1 = __importDefault(require("../../data/TitleSummaryRejectedPatterns.json"));
10
+ const createWordBoundaryRegex_1 = require("../util/createWordBoundaryRegex");
11
+ // const matchingPatternsForLocations = TitleSummaryMatchingPatterns.locations;
12
+ // Combine patterns into one
13
+ // const matchingPatterns = ALL_LOCATIONS.join('|');
14
+ const regexForLocations = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(TitleSummaryMatchingPatterns_Locations_json_1.default);
15
+ // const combinedRegex = new RegExp(`\\b(${matchingPatterns})\\b`, 'gi');
16
+ // /**
17
+ // * Set of recognized locations.
18
+ // */
19
+ // let locationSet: Set<string> = null;
20
+ /**
21
+ * Set of rejected locations.
22
+ */
23
+ let rejectedLocationSet = null; //new Set<string>(TitleSummaryMatchingPatterns.topics)
24
+ /**
25
+ * Regular expression to match additional location information.
26
+ */
27
+ let regexForExtraLocations = null;
28
+ // /**
29
+ // * Creates a set of recognized locations from predefined and extra matching patterns.
30
+ // * @param extraMatchingPatternsForLocations
31
+ // */
32
+ // const createLocationSet = (
33
+ // extraMatchingPatternsForLocations: string[] = []
34
+ // ) => {
35
+ // locationSet = toLowercaseSet([
36
+ // ...matchingPatternsForLocations,
37
+ // ...extraMatchingPatternsForLocations,
38
+ // ]);
39
+ // };
40
+ /**
41
+ * Creates a set of rejected locations from predefined and extra rejected patterns.
42
+ * @param extraRejectedPatternsForLocations
43
+ */
44
+ const createRejectedLocationSet = (extraRejectedPatternsForLocations = []) => {
45
+ rejectedLocationSet = (0, helpers_1.toLowercaseSet)([
46
+ ...TitleSummaryRejectedPatterns_json_1.default.common,
47
+ ...TitleSummaryRejectedPatterns_json_1.default.locations,
48
+ ...extraRejectedPatternsForLocations,
49
+ ]);
50
+ };
51
+ /**
52
+ * Sets extra matching and rejected patterns for locations and updates the corresponding regex and sets.
53
+ *
54
+ * @param extraMatchingPatternsForLocations - An array of strings representing additional patterns to match locations.
55
+ * @param extraRejectedPatternsForLocations - An array of strings representing additional patterns to reject locations.
56
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
57
+ *
58
+ * @remarks
59
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
60
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
61
+ */
62
+ const setExtraMatchingPatternsForLocations = (extraMatchingPatternsForLocations = [], extraRejectedPatternsForLocations = [], shouldAvoidUsingWordBoundary = false) => {
63
+ if (extraMatchingPatternsForLocations.length) {
64
+ regexForExtraLocations = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(extraMatchingPatternsForLocations, {
65
+ shouldAvoidUsingWordBoundary,
66
+ });
67
+ }
68
+ // createLocationSet(extraMatchingPatternsForLocations);
69
+ createRejectedLocationSet(extraRejectedPatternsForLocations);
70
+ };
71
+ exports.setExtraMatchingPatternsForLocations = setExtraMatchingPatternsForLocations;
72
+ /**
73
+ * Matches location information from an item's title and snippet using predefined or custom matching patterns.
74
+ *
75
+ * @param {IItem} item - The item containing title and snippet to search for location information.
76
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be included in the search.
77
+ * @returns {string[]} - An array of matched results for location info after deduplication and removing overlaps.
78
+ */
79
+ const matchLocationInfo = (item, customMatchingPattenrs) => {
80
+ // const title = item.title || '';
81
+ // const snippet = item.snippet || '';
82
+ // const matchedLocationsFromTitle: string[] =
83
+ // title.match(combinedRegex) || [];
84
+ // const matchedLocationsFromSnippet: string[] =
85
+ // snippet.match(combinedRegex) || [];
86
+ // if (
87
+ // !matchedLocationsFromTitle.length &&
88
+ // !matchedLocationsFromSnippet.length
89
+ // ) {
90
+ // return matchWithCustomPatterns(
91
+ // item,
92
+ // customMatchingPattenrs,
93
+ // rejectedLocationSet
94
+ // );
95
+ // }
96
+ // return cleanUpMatchedResults([
97
+ // ...matchedLocationsFromTitle,
98
+ // ...matchedLocationsFromSnippet,
99
+ // ]);
100
+ // let matched: string[] = matchPatternsInTitleAndSnippet(
101
+ // item,
102
+ // regexForLocations
103
+ // );
104
+ // if (!matched.length && regexForExtraLocations) {
105
+ // matched = matchPatternsInTitleAndSnippet(item, regexForExtraLocations);
106
+ // }
107
+ // if (!matched.length && customMatchingPattenrs?.length) {
108
+ // if (!rejectedLocationSet) {
109
+ // createRejectedLocationSet();
110
+ // }
111
+ // matched = matchWithCustomPatterns(
112
+ // item,
113
+ // customMatchingPattenrs,
114
+ // rejectedLocationSet
115
+ // );
116
+ // }
117
+ // return matched;
118
+ if (!rejectedLocationSet) {
119
+ createRejectedLocationSet();
120
+ }
121
+ return (0, helpers_1.matchUsingMultiplePatterns)({
122
+ item,
123
+ predefinedPatterns: regexForLocations,
124
+ extraPatterns: regexForExtraLocations,
125
+ customTerms: customMatchingPattenrs,
126
+ rejectedSet: rejectedLocationSet,
127
+ });
128
+ };
129
+ exports.matchLocationInfo = matchLocationInfo;
130
+ /**
131
+ * Determines if a given string is a recognized location information.
132
+ * @param topic - The string to check.
133
+ * @returns {boolean} - True if the string is a recognized location, false otherwise.
134
+ */
135
+ const isRecognizedLocation = (str) => {
136
+ // if (!locationSet) {
137
+ // const sources = ALL_LOCATIONS.map((d) => d.toLowerCase());
138
+ // locationSet = new Set<string>(sources);
139
+ // }
140
+ // locationSet = locationSet || toLowercaseSet(matchingPatternsForLocations);
141
+ // if (!locationSet) {
142
+ // createLocationSet();
143
+ // }
144
+ // return isFoundInSet(str, locationSet);
145
+ const locationInfo = (0, exports.matchLocationInfo)({
146
+ title: str,
147
+ snippet: '',
148
+ });
149
+ return locationInfo.length > 0;
150
+ };
151
+ exports.isRecognizedLocation = isRecognizedLocation;
152
+ /**
153
+ * Determines if a given string is a rejected source information.
154
+ * @param str - The string to check.
155
+ * @returns {boolean} - True if the string is a rejected source information, false otherwise.
156
+ */
157
+ const isRejectedLocationInfo = (str) => {
158
+ // if (!rejectedLocationSet) {
159
+ // rejectedLocationSet = toLowercaseSet([
160
+ // ...TitleSummaryRejectedPatterns.common,
161
+ // ...TitleSummaryRejectedPatterns.locations,
162
+ // ]);
163
+ // }
164
+ if (!rejectedLocationSet) {
165
+ createRejectedLocationSet();
166
+ }
167
+ return (0, helpers_1.isFoundInSet)(str, rejectedLocationSet);
168
+ };
169
+ exports.isRejectedLocationInfo = isRejectedLocationInfo;
170
+ //# sourceMappingURL=matchLocationInfo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"matchLocationInfo.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/matchLocationInfo.ts"],"names":[],"mappings":";;;;;;AACA,uCAUmB;AACnB,yIAAkG;AAClG,qHAAwF;AACxF,6EAA0E;AAE1E,+EAA+E;AAE/E,4BAA4B;AAC5B,oDAAoD;AAEpD,MAAM,iBAAiB,GAAG,IAAA,iDAAuB,EAAC,qDAA4B,CAAC,CAAC;AAChF,yEAAyE;AAEzE,MAAM;AACN,kCAAkC;AAClC,MAAM;AACN,uCAAuC;AAEvC;;GAEG;AACH,IAAI,mBAAmB,GAAgB,IAAI,CAAC,CAAC,sDAAsD;AAEnG;;GAEG;AACH,IAAI,sBAAsB,GAAW,IAAI,CAAC;AAE1C,MAAM;AACN,wFAAwF;AACxF,8CAA8C;AAC9C,MAAM;AACN,8BAA8B;AAC9B,uDAAuD;AACvD,SAAS;AACT,qCAAqC;AACrC,2CAA2C;AAC3C,gDAAgD;AAChD,UAAU;AACV,KAAK;AAEL;;;GAGG;AACH,MAAM,yBAAyB,GAAG,CAC9B,oCAA8C,EAAE,EAClD,EAAE;IACA,mBAAmB,GAAG,IAAA,wBAAc,EAAC;QACjC,GAAG,2CAA4B,CAAC,MAAM;QACtC,GAAG,2CAA4B,CAAC,SAAS;QACzC,GAAG,iCAAiC;KACvC,CAAC,CAAC;AACP,CAAC,CAAC;AAEF;;;;;;;;;;GAUG;AACI,MAAM,oCAAoC,GAAG,CAChD,oCAA8C,EAAE,EAChD,oCAA8C,EAAE,EAChD,4BAA4B,GAAG,KAAK,EACtC,EAAE;IACA,IAAI,iCAAiC,CAAC,MAAM,EAAE;QAC1C,sBAAsB,GAAG,IAAA,iDAAuB,EAC5C,iCAAiC,EACjC;YACI,4BAA4B;SAC/B,CACJ,CAAC;KACL;IAED,wDAAwD;IACxD,yBAAyB,CAAC,iCAAiC,CAAC,CAAC;AACjE,CAAC,CAAC;AAhBW,QAAA,oCAAoC,wCAgB/C;AAEF;;;;;;GAMG;AACI,MAAM,iBAAiB,GAAG,CAC7B,IAAW,EACX,sBAAiC,EACpB,EAAE;IACf,kCAAkC;IAClC,sCAAsC;IAEtC,8CAA8C;IAC9C,wCAAwC;IACxC,gDAAgD;IAChD,0CAA0C;IAE1C,OAAO;IACP,2CAA2C;IAC3C,0CAA0C;IAC1C,MAAM;IACN,sCAAsC;IACtC,gBAAgB;IAChB,kCAAkC;IAClC,8BAA8B;IAC9B,SAAS;IACT,IAAI;IAEJ,iCAAiC;IACjC,oCAAoC;IACpC,sCAAsC;IACtC,MAAM;IAEN,0DAA0D;IAC1D,YAAY;IACZ,wBAAwB;IACxB,KAAK;IAEL,mDAAmD;IACnD,8EAA8E;IAC9E,IAAI;IAEJ,2DAA2D;IAC3D,kCAAkC;IAClC,uCAAuC;IACvC,QAAQ;IAER,yCAAyC;IACzC,gBAAgB;IAChB,kCAAkC;IAClC,8BAA8B;IAC9B,SAAS;IACT,IAAI;IAEJ,kBAAkB;IAElB,IAAI,CAAC,mBAAmB,EAAE;QACtB,yBAAyB,EAAE,CAAC;KAC/B;IAED,OAAO,IAAA,oCAA0B,EAAC;QAC9B,IAAI;QACJ,kBAAkB,EAAE,iBAAiB;QACrC,aAAa,EAAE,sBAAsB;QACrC,WAAW,EAAE,sBAAsB;QACnC,WAAW,EAAE,mBAAmB;KACnC,CAAC,CAAC;AACP,CAAC,CAAC;AA9DW,QAAA,iBAAiB,qBA8D5B;AAEF;;;;GAIG;AACI,MAAM,oBAAoB,GAAG,CAAC,GAAW,EAAW,EAAE;IACzD,sBAAsB;IACtB,iEAAiE;IACjE,8CAA8C;IAC9C,IAAI;IAEJ,6EAA6E;IAE7E,sBAAsB;IACtB,2BAA2B;IAC3B,IAAI;IAEJ,yCAAyC;IAEzC,MAAM,YAAY,GAAG,IAAA,yBAAiB,EAAC;QACnC,KAAK,EAAE,GAAG;QACV,OAAO,EAAE,EAAE;KACL,CAAC,CAAC;IAEZ,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;AACnC,CAAC,CAAC;AApBW,QAAA,oBAAoB,wBAoB/B;AAEF;;;;GAIG;AACI,MAAM,sBAAsB,GAAG,CAAC,GAAW,EAAW,EAAE;IAC3D,8BAA8B;IAC9B,6CAA6C;IAC7C,kDAAkD;IAClD,qDAAqD;IACrD,UAAU;IACV,IAAI;IAEJ,IAAI,CAAC,mBAAmB,EAAE;QACtB,yBAAyB,EAAE,CAAC;KAC/B;IAED,OAAO,IAAA,sBAAY,EAAC,GAAG,EAAE,mBAAmB,CAAC,CAAC;AAClD,CAAC,CAAC;AAbW,QAAA,sBAAsB,0BAajC"}
@@ -0,0 +1,34 @@
1
+ import { IItem } from '../../types';
2
+ import { MatchResult } from './helpers';
3
+ /**
4
+ * Sets extra matching patterns for sources and creates corresponding regex and sets.
5
+ *
6
+ * @param extraMatchingPatternsForSources - An array of strings representing extra matching patterns for sources.
7
+ * @param extraRejectedPatternsForSources - An array of strings representing extra rejected patterns for sources.
8
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
9
+ *
10
+ * @remarks
11
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
12
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
13
+ */
14
+ export declare const setExtraMatchingPatternsForSources: (extraMatchingPatternsForSources: string[], extraRejectedPatternsForSources: string[], shouldAvoidUsingWordBoundary?: boolean) => void;
15
+ /**
16
+ * Matches source information from an item's title and snippet using predefined or custom matching patterns.
17
+ *
18
+ * @param {IItem} item - The item containing title and snippet to search for matching patterns.
19
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be used in addition to the predefined ones.
20
+ * @returns {string[]} - An array of matched results for source info after deduplication and removal of overlaps.
21
+ */
22
+ export declare const matchSourceInfo: (item: IItem, customMatchingPattenrs?: string[]) => MatchResult[];
23
+ /**
24
+ * Determines if a given string is a recognized source information.
25
+ * @param topic - The string to check.
26
+ * @returns {boolean} - True if the string is a recognized source, false otherwise.
27
+ */
28
+ export declare const isRecognizedSource: (str: string) => boolean;
29
+ /**
30
+ * Determines if a given string is a rejected source information.
31
+ * @param str - The string to check.
32
+ * @returns {boolean} - True if the string is a rejected source information, false otherwise.
33
+ */
34
+ export declare const isRejectedSourceInfo: (str: string) => boolean;
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.isRejectedSourceInfo = exports.isRecognizedSource = exports.matchSourceInfo = exports.setExtraMatchingPatternsForSources = void 0;
7
+ const helpers_1 = require("./helpers");
8
+ const TitleSummaryMatchingPatterns_Sources_json_1 = __importDefault(require("../../data/TitleSummaryMatchingPatterns_Sources.json"));
9
+ const TitleSummaryRejectedPatterns_json_1 = __importDefault(require("../../data/TitleSummaryRejectedPatterns.json"));
10
+ const createWordBoundaryRegex_1 = require("../util/createWordBoundaryRegex");
11
+ // /**
12
+ // * Pre-defined matching patterns for sources.
13
+ // */
14
+ // const matchingPatternsForSources = TitleSummaryMatchingPatterns.sources;
15
+ /**
16
+ * Regular expression to match source information in a case-insensitive manner.
17
+ * The patterns in TitleSummaryMatchingPatterns is already escaped.
18
+ */
19
+ const regexForSourceInfo = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(TitleSummaryMatchingPatterns_Sources_json_1.default);
20
+ // const regexForSourceInfo = new RegExp(`\\b(${matchingPatternsForSources})\\b`, 'gi');
21
+ // /**
22
+ // * set of matching patterns for source info
23
+ // */
24
+ // let sourcesSet: Set<string> = null;
25
+ /**
26
+ * Set of rejected sources.
27
+ */
28
+ let rejectedSourcesSet = null;
29
+ /**
30
+ * Regular expression to match additional source information.
31
+ */
32
+ let regexForExtraSources = null;
33
+ // /**
34
+ // * Create a set of recognized sources from predefined and extra matching patterns.
35
+ // * @param extraMatchingPatternsForSources
36
+ // */
37
+ // const createSourcesSet = (extraMatchingPatternsForSources: string[] = []) => {
38
+ // sourcesSet = toLowercaseSet([
39
+ // ...TitleSummaryMatchingPatterns.sources,
40
+ // ...extraMatchingPatternsForSources,
41
+ // ]);
42
+ // };
43
+ /**
44
+ * Creates a set of rejected sources from predefined and extra rejected patterns.
45
+ *
46
+ * @param {string[]} extraRejectedPatternsForSources - Additional rejected patterns to be set.
47
+ */
48
+ const createRejectedSourcesSet = (extraRejectedPatternsForSources = []) => {
49
+ rejectedSourcesSet = (0, helpers_1.toLowercaseSet)([
50
+ ...TitleSummaryRejectedPatterns_json_1.default.common,
51
+ ...TitleSummaryRejectedPatterns_json_1.default.sources,
52
+ ...extraRejectedPatternsForSources,
53
+ ]);
54
+ };
55
+ /**
56
+ * Sets extra matching patterns for sources and creates corresponding regex and sets.
57
+ *
58
+ * @param extraMatchingPatternsForSources - An array of strings representing extra matching patterns for sources.
59
+ * @param extraRejectedPatternsForSources - An array of strings representing extra rejected patterns for sources.
60
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
61
+ *
62
+ * @remarks
63
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
64
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
65
+ */
66
+ const setExtraMatchingPatternsForSources = (extraMatchingPatternsForSources, extraRejectedPatternsForSources, shouldAvoidUsingWordBoundary = false) => {
67
+ if (extraMatchingPatternsForSources &&
68
+ extraMatchingPatternsForSources.length > 0) {
69
+ regexForExtraSources = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(extraMatchingPatternsForSources, {
70
+ shouldAvoidUsingWordBoundary,
71
+ });
72
+ }
73
+ // createSourcesSet(extraMatchingPatternsForSources);
74
+ createRejectedSourcesSet(extraRejectedPatternsForSources);
75
+ };
76
+ exports.setExtraMatchingPatternsForSources = setExtraMatchingPatternsForSources;
77
+ /**
78
+ * Matches source information from an item's title and snippet using predefined or custom matching patterns.
79
+ *
80
+ * @param {IItem} item - The item containing title and snippet to search for matching patterns.
81
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be used in addition to the predefined ones.
82
+ * @returns {string[]} - An array of matched results for source info after deduplication and removal of overlaps.
83
+ */
84
+ const matchSourceInfo = (item, customMatchingPattenrs) => {
85
+ // const title = item.title || '';
86
+ // const snippet = item.snippet || '';
87
+ // const matchedStrFromTitle: string[] = title.match(regex4SourceInfo) || [];
88
+ // const matchedStrFromSnippet: string[] =
89
+ // snippet.match(regex4SourceInfo) || [];
90
+ // if (!matchedStrFromTitle.length && !matchedStrFromSnippet.length) {
91
+ // return matchWithCustomPatterns(
92
+ // item,
93
+ // customMatchingPattenrs,
94
+ // rejectedSourcesSet
95
+ // );
96
+ // }
97
+ // return cleanUpMatchedResults([
98
+ // ...matchedStrFromTitle,
99
+ // ...matchedStrFromSnippet,
100
+ // ]);
101
+ // let matched: string[] = matchPatternsInTitleAndSnippet(
102
+ // item,
103
+ // regexForSourceInfo
104
+ // );
105
+ // if (!matched.length && regexForExtraSources) {
106
+ // matched = matchPatternsInTitleAndSnippet(item, regexForExtraSources);
107
+ // }
108
+ // if (!matched.length && customMatchingPattenrs?.length) {
109
+ // if (!rejectedSourcesSet) {
110
+ // createRejectedSourcesSet();
111
+ // }
112
+ // matched = matchWithCustomPatterns(
113
+ // item,
114
+ // customMatchingPattenrs,
115
+ // rejectedSourcesSet
116
+ // );
117
+ // }
118
+ // return matched;
119
+ if (!rejectedSourcesSet) {
120
+ createRejectedSourcesSet();
121
+ }
122
+ return (0, helpers_1.matchUsingMultiplePatterns)({
123
+ item,
124
+ predefinedPatterns: regexForSourceInfo,
125
+ extraPatterns: regexForExtraSources,
126
+ customTerms: customMatchingPattenrs,
127
+ rejectedSet: rejectedSourcesSet,
128
+ });
129
+ };
130
+ exports.matchSourceInfo = matchSourceInfo;
131
+ /**
132
+ * Determines if a given string is a recognized source information.
133
+ * @param topic - The string to check.
134
+ * @returns {boolean} - True if the string is a recognized source, false otherwise.
135
+ */
136
+ const isRecognizedSource = (str) => {
137
+ // if (!sourcesSet) {
138
+ // const sources = TitleSummaryMatchingPatterns.sources.map((d) =>
139
+ // d.toLowerCase()
140
+ // );
141
+ // sourcesSet = new Set<string>(sources);
142
+ // }
143
+ // sourcesSet =
144
+ // sourcesSet || toLowercaseSet(TitleSummaryMatchingPatterns?.sources);
145
+ // if (!sourcesSet) {
146
+ // createSourcesSet();
147
+ // }
148
+ // return isFoundInSet(str, sourcesSet);
149
+ const sourceInfo = (0, exports.matchSourceInfo)({
150
+ title: str,
151
+ snippet: '',
152
+ });
153
+ return sourceInfo.length > 0;
154
+ };
155
+ exports.isRecognizedSource = isRecognizedSource;
156
+ /**
157
+ * Determines if a given string is a rejected source information.
158
+ * @param str - The string to check.
159
+ * @returns {boolean} - True if the string is a rejected source information, false otherwise.
160
+ */
161
+ const isRejectedSourceInfo = (str) => {
162
+ if (!rejectedSourcesSet) {
163
+ // rejectedSourcesSet = toLowercaseSet([
164
+ // ...TitleSummaryRejectedPatterns.common,
165
+ // ...TitleSummaryRejectedPatterns.sources,
166
+ // ]);
167
+ createRejectedSourcesSet();
168
+ }
169
+ return (0, helpers_1.isFoundInSet)(str, rejectedSourcesSet);
170
+ };
171
+ exports.isRejectedSourceInfo = isRejectedSourceInfo;
172
+ //# sourceMappingURL=matchSourceInfo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"matchSourceInfo.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/matchSourceInfo.ts"],"names":[],"mappings":";;;;;;AACA,uCAUmB;AACnB,qIAA8F;AAC9F,qHAAwF;AACxF,6EAA0E;AAE1E,MAAM;AACN,gDAAgD;AAChD,MAAM;AACN,2EAA2E;AAE3E;;;GAGG;AACH,MAAM,kBAAkB,GAAG,IAAA,iDAAuB,EAAC,mDAA0B,CAAC,CAAC;AAC/E,wFAAwF;AAExF,MAAM;AACN,8CAA8C;AAC9C,MAAM;AACN,sCAAsC;AACtC;;GAEG;AACH,IAAI,kBAAkB,GAAgB,IAAI,CAAC;AAE3C;;GAEG;AACH,IAAI,oBAAoB,GAAW,IAAI,CAAC;AAExC,MAAM;AACN,qFAAqF;AACrF,4CAA4C;AAC5C,MAAM;AACN,iFAAiF;AACjF,oCAAoC;AACpC,mDAAmD;AACnD,8CAA8C;AAC9C,UAAU;AACV,KAAK;AAEL;;;;GAIG;AACH,MAAM,wBAAwB,GAAG,CAC7B,kCAA4C,EAAE,EAChD,EAAE;IACA,kBAAkB,GAAG,IAAA,wBAAc,EAAC;QAChC,GAAG,2CAA4B,CAAC,MAAM;QACtC,GAAG,2CAA4B,CAAC,OAAO;QACvC,GAAG,+BAA+B;KACrC,CAAC,CAAC;AACP,CAAC,CAAC;AAEF;;;;;;;;;;GAUG;AACI,MAAM,kCAAkC,GAAG,CAC9C,+BAAyC,EACzC,+BAAyC,EACzC,4BAA4B,GAAG,KAAK,EACtC,EAAE;IACA,IACI,+BAA+B;QAC/B,+BAA+B,CAAC,MAAM,GAAG,CAAC,EAC5C;QACE,oBAAoB,GAAG,IAAA,iDAAuB,EAC1C,+BAA+B,EAC/B;YACI,4BAA4B;SAC/B,CACJ,CAAC;KACL;IAED,qDAAqD;IACrD,wBAAwB,CAAC,+BAA+B,CAAC,CAAC;AAC9D,CAAC,CAAC;AAnBW,QAAA,kCAAkC,sCAmB7C;AAEF;;;;;;GAMG;AACI,MAAM,eAAe,GAAG,CAC3B,IAAW,EACX,sBAAiC,EACpB,EAAE;IACf,kCAAkC;IAClC,sCAAsC;IAEtC,6EAA6E;IAC7E,0CAA0C;IAC1C,6CAA6C;IAE7C,sEAAsE;IACtE,sCAAsC;IACtC,gBAAgB;IAChB,kCAAkC;IAClC,6BAA6B;IAC7B,SAAS;IACT,IAAI;IAEJ,iCAAiC;IACjC,8BAA8B;IAC9B,gCAAgC;IAChC,MAAM;IAEN,0DAA0D;IAC1D,YAAY;IACZ,yBAAyB;IACzB,KAAK;IAEL,iDAAiD;IACjD,4EAA4E;IAC5E,IAAI;IAEJ,2DAA2D;IAC3D,iCAAiC;IACjC,sCAAsC;IACtC,QAAQ;IAER,yCAAyC;IACzC,gBAAgB;IAChB,kCAAkC;IAClC,6BAA6B;IAC7B,SAAS;IACT,IAAI;IAEJ,kBAAkB;IAElB,IAAI,CAAC,kBAAkB,EAAE;QACrB,wBAAwB,EAAE,CAAC;KAC9B;IAED,OAAO,IAAA,oCAA0B,EAAC;QAC9B,IAAI;QACJ,kBAAkB,EAAE,kBAAkB;QACtC,aAAa,EAAE,oBAAoB;QACnC,WAAW,EAAE,sBAAsB;QACnC,WAAW,EAAE,kBAAkB;KAClC,CAAC,CAAC;AACP,CAAC,CAAC;AA1DW,QAAA,eAAe,mBA0D1B;AAEF;;;;GAIG;AACI,MAAM,kBAAkB,GAAG,CAAC,GAAW,EAAW,EAAE;IACvD,qBAAqB;IACrB,sEAAsE;IACtE,0BAA0B;IAC1B,SAAS;IACT,6CAA6C;IAC7C,IAAI;IAEJ,eAAe;IACf,2EAA2E;IAE3E,qBAAqB;IACrB,0BAA0B;IAC1B,IAAI;IAEJ,wCAAwC;IAExC,MAAM,UAAU,GAAG,IAAA,uBAAe,EAAC;QAC/B,KAAK,EAAE,GAAG;QACV,OAAO,EAAE,EAAE;KACL,CAAC,CAAC;IAEZ,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC;AACjC,CAAC,CAAC;AAvBW,QAAA,kBAAkB,sBAuB7B;AAEF;;;;GAIG;AACI,MAAM,oBAAoB,GAAG,CAAC,GAAW,EAAW,EAAE;IACzD,IAAI,CAAC,kBAAkB,EAAE;QACrB,wCAAwC;QACxC,8CAA8C;QAC9C,+CAA+C;QAC/C,MAAM;QACN,wBAAwB,EAAE,CAAC;KAC9B;IAED,OAAO,IAAA,sBAAY,EAAC,GAAG,EAAE,kBAAkB,CAAC,CAAC;AACjD,CAAC,CAAC;AAVW,QAAA,oBAAoB,wBAU/B"}
@@ -0,0 +1,36 @@
1
+ import { IItem } from '../../types';
2
+ import { MatchResult } from './helpers';
3
+ /**
4
+ * Sets additional matching patterns for topics.
5
+ * The additional matching patterns and rejected patterns are not included in the TitleSummaryMatchingPatterns
6
+ * but fetched from the media folder from the Living Atlas server.
7
+ *
8
+ * @param {string[]} extraMatchingPatterns - Additional matching patterns to be set.
9
+ * @param {string[]} extraRejectedPatterns - Additional rejected patterns to be set.
10
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
11
+ *
12
+ * @remarks
13
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
14
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
15
+ */
16
+ export declare const setExtraMatchingPatternsForTopics: (extraMatchingPatterns: string[], extraRejectedPatterns: string[], shouldAvoidUsingWordBoundary?: boolean) => void;
17
+ /**
18
+ * Matches topic information from an item's title and snippet using predefined or custom matching patterns.
19
+ *
20
+ * @param {IItem} item - The item containing the title and snippet to be matched.
21
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be used in addition to the predefined ones.
22
+ * @returns {MatchResult[]} - An array of matched results for topic, deduplicated and with overlaps removed.
23
+ */
24
+ export declare const matchTopicInfo: (item: IItem, customMatchingPattenrs?: string[]) => MatchResult[];
25
+ /**
26
+ * Determines if a given string is a recognized topic.
27
+ * @param topic - The string to check.
28
+ * @returns {boolean} - True if the string is a recognized topic, false otherwise.
29
+ */
30
+ export declare const isRecognizedTopic: (str: string) => boolean;
31
+ /**
32
+ * Determines if a given string is a rejected topic.
33
+ * @param str - The string to check.
34
+ * @returns {boolean} - True if the string is a rejected topic, false otherwise.
35
+ */
36
+ export declare const isRejectedTopicInfo: (str: string) => boolean;
@@ -0,0 +1,175 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.isRejectedTopicInfo = exports.isRecognizedTopic = exports.matchTopicInfo = exports.setExtraMatchingPatternsForTopics = void 0;
7
+ const helpers_1 = require("./helpers");
8
+ const TitleSummaryMatchingPatterns_Topics_json_1 = __importDefault(require("../../data/TitleSummaryMatchingPatterns_Topics.json"));
9
+ const TitleSummaryRejectedPatterns_json_1 = __importDefault(require("../../data/TitleSummaryRejectedPatterns.json"));
10
+ const createWordBoundaryRegex_1 = require("../util/createWordBoundaryRegex");
11
+ /**
12
+ * Regular expression to match topic information in a case-insensitive manner.
13
+ * The pattern in TitleSummaryMatchingPatterns is already escaped.
14
+ */
15
+ const regexForTopics = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(TitleSummaryMatchingPatterns_Topics_json_1.default, {
16
+ shouldMatchPlural: true,
17
+ });
18
+ // const regexForTopics = new RegExp(`\\b(${matchingPatternsForTopics})\\b`, 'gi');
19
+ // /**
20
+ // * Set of recognized topics.
21
+ // */
22
+ // let topicsSet: Set<string> = null; //new Set<string>(TitleSummaryMatchingPatterns.topics);
23
+ /**
24
+ * Set of rejected topics.
25
+ */
26
+ let rejectedTopicsSet = null; //new Set<string>(TitleSummaryMatchingPatterns.topics)
27
+ /**
28
+ * Regular expression to match additional topic information.
29
+ */
30
+ let regexForExtraTopics = null;
31
+ // /**
32
+ // * Array of additional matching patterns for topics.
33
+ // */
34
+ // let extraMatchingPatternsForTopics: string[] = [];
35
+ // /**
36
+ // * Array of additional rejected patterns for topics.
37
+ // */
38
+ // let extraRejectedPatternsForTopics: string[] = [];
39
+ // /**
40
+ // * Creates a set of recognized topics from predefined and extra matching patterns.
41
+ // *
42
+ // * @param {string[]} extraMatchingPatternsForTopics - Additional matching patterns to be set.
43
+ // */
44
+ // const createTopicsSet = (extraMatchingPatternsForTopics: string[] = []) => {
45
+ // topicsSet = toLowercaseSet([
46
+ // ...TitleSummaryMatchingPatterns.topics,
47
+ // ...extraMatchingPatternsForTopics,
48
+ // ]);
49
+ // };
50
+ /**
51
+ * Creates a set of rejected topics from predefined and extra rejected patterns.
52
+ *
53
+ * @param {string[]} extraRejectedPatternsForTopics - Additional rejected patterns to be set.
54
+ */
55
+ const createRejectedTopicsSet = (extraRejectedPatternsForTopics = []) => {
56
+ rejectedTopicsSet = (0, helpers_1.toLowercaseSet)([
57
+ ...TitleSummaryRejectedPatterns_json_1.default.common,
58
+ ...TitleSummaryRejectedPatterns_json_1.default.topics,
59
+ ...extraRejectedPatternsForTopics,
60
+ ]);
61
+ };
62
+ /**
63
+ * Sets additional matching patterns for topics.
64
+ * The additional matching patterns and rejected patterns are not included in the TitleSummaryMatchingPatterns
65
+ * but fetched from the media folder from the Living Atlas server.
66
+ *
67
+ * @param {string[]} extraMatchingPatterns - Additional matching patterns to be set.
68
+ * @param {string[]} extraRejectedPatterns - Additional rejected patterns to be set.
69
+ * @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
70
+ *
71
+ * @remarks
72
+ * - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
73
+ * set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
74
+ */
75
+ const setExtraMatchingPatternsForTopics = (extraMatchingPatterns, extraRejectedPatterns, shouldAvoidUsingWordBoundary = false) => {
76
+ // extraMatchingPatternsForTopics = matchingPatterns || [];
77
+ // extraRejectedPatternsForTopics = rejectedPatterns || [];
78
+ if (extraMatchingPatterns && extraMatchingPatterns.length) {
79
+ regexForExtraTopics = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(extraMatchingPatterns, {
80
+ shouldAvoidUsingWordBoundary,
81
+ shouldMatchPlural: true,
82
+ });
83
+ }
84
+ // createTopicsSet(extraMatchingPatterns);
85
+ createRejectedTopicsSet(extraRejectedPatterns);
86
+ };
87
+ exports.setExtraMatchingPatternsForTopics = setExtraMatchingPatternsForTopics;
88
+ /**
89
+ * Matches topic information from an item's title and snippet using predefined or custom matching patterns.
90
+ *
91
+ * @param {IItem} item - The item containing the title and snippet to be matched.
92
+ * @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be used in addition to the predefined ones.
93
+ * @returns {MatchResult[]} - An array of matched results for topic, deduplicated and with overlaps removed.
94
+ */
95
+ const matchTopicInfo = (item, customMatchingPattenrs) => {
96
+ // const title = item.title || '';
97
+ // const snippet = item.snippet || '';
98
+ // const matchedStrFromTitle: string[] = title.match(regexForTopics) || [];
99
+ // const matchedStrFromSnippet: string[] = snippet.match(regexForTopics) || [];
100
+ // if (!matchedStrFromTitle.length && !matchedStrFromSnippet.length) {
101
+ // return matchWithCustomPatterns(item, customMatchingPattenrs);
102
+ // }
103
+ // return cleanUpMatchedResults([
104
+ // ...matchedStrFromTitle,
105
+ // ...matchedStrFromSnippet,
106
+ // ]);
107
+ // // match topics using patterns from the TitleSummaryMatchingPatterns
108
+ // let matched: string[] = matchPatternsInTitleAndSnippet(
109
+ // item,
110
+ // regexForTopics
111
+ // );
112
+ // // if no matches found, try matching with extra patterns if provided
113
+ // // the extra patterns are not included in the TitleSummaryMatchingPatterns
114
+ // // but fetched from the media folder from the Living Atlas server
115
+ // if (!matched.length && regexForExtraTopics) {
116
+ // matched = matchPatternsInTitleAndSnippet(item, regexForExtraTopics);
117
+ // }
118
+ // // if still no matches found, try matching with custom patterns if provided
119
+ // if (!matched.length && customMatchingPattenrs?.length) {
120
+ // if (!rejectedTopicsSet) {
121
+ // createRejectedTopicsSet();
122
+ // }
123
+ // matched = matchWithCustomPatterns(
124
+ // item,
125
+ // customMatchingPattenrs,
126
+ // rejectedTopicsSet
127
+ // );
128
+ // }
129
+ // return matched;
130
+ if (!rejectedTopicsSet) {
131
+ createRejectedTopicsSet();
132
+ }
133
+ return (0, helpers_1.matchUsingMultiplePatterns)({
134
+ item,
135
+ predefinedPatterns: regexForTopics,
136
+ extraPatterns: regexForExtraTopics,
137
+ customTerms: customMatchingPattenrs,
138
+ rejectedSet: rejectedTopicsSet,
139
+ });
140
+ };
141
+ exports.matchTopicInfo = matchTopicInfo;
142
+ /**
143
+ * Determines if a given string is a recognized topic.
144
+ * @param topic - The string to check.
145
+ * @returns {boolean} - True if the string is a recognized topic, false otherwise.
146
+ */
147
+ const isRecognizedTopic = (str) => {
148
+ // if (!topicsSet) {
149
+ // // topicsSet = toLowercaseSet([
150
+ // // ...TitleSummaryMatchingPatterns.topics,
151
+ // // ...extraMatchingPatternsForTopics,
152
+ // // ]);
153
+ // createTopicsSet();
154
+ // }
155
+ // return isFoundInSet(str, topicsSet);
156
+ const topicInfo = (0, exports.matchTopicInfo)({
157
+ title: str,
158
+ snippet: '',
159
+ });
160
+ return topicInfo.length > 0;
161
+ };
162
+ exports.isRecognizedTopic = isRecognizedTopic;
163
+ /**
164
+ * Determines if a given string is a rejected topic.
165
+ * @param str - The string to check.
166
+ * @returns {boolean} - True if the string is a rejected topic, false otherwise.
167
+ */
168
+ const isRejectedTopicInfo = (str) => {
169
+ if (!rejectedTopicsSet) {
170
+ createRejectedTopicsSet();
171
+ }
172
+ return (0, helpers_1.isFoundInSet)(str, rejectedTopicsSet);
173
+ };
174
+ exports.isRejectedTopicInfo = isRejectedTopicInfo;
175
+ //# sourceMappingURL=matchTopicInfo.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"matchTopicInfo.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/matchTopicInfo.ts"],"names":[],"mappings":";;;;;;AACA,uCAUmB;AACnB,mIAA4F;AAC5F,qHAAwF;AACxF,6EAA0E;AAE1E;;;GAGG;AACH,MAAM,cAAc,GAAG,IAAA,iDAAuB,EAAC,kDAAyB,EAAE;IACtE,iBAAiB,EAAE,IAAI;CAC1B,CAAC,CAAC;AACH,mFAAmF;AAEnF,MAAM;AACN,+BAA+B;AAC/B,MAAM;AACN,6FAA6F;AAE7F;;GAEG;AACH,IAAI,iBAAiB,GAAgB,IAAI,CAAC,CAAC,sDAAsD;AAEjG;;GAEG;AACH,IAAI,mBAAmB,GAAW,IAAI,CAAC;AAEvC,MAAM;AACN,uDAAuD;AACvD,MAAM;AACN,qDAAqD;AACrD,MAAM;AACN,uDAAuD;AACvD,MAAM;AACN,qDAAqD;AAErD,MAAM;AACN,qFAAqF;AACrF,KAAK;AACL,gGAAgG;AAChG,MAAM;AACN,+EAA+E;AAC/E,mCAAmC;AACnC,kDAAkD;AAClD,6CAA6C;AAC7C,UAAU;AACV,KAAK;AAEL;;;;GAIG;AACH,MAAM,uBAAuB,GAAG,CAC5B,iCAA2C,EAAE,EAC/C,EAAE;IACA,iBAAiB,GAAG,IAAA,wBAAc,EAAC;QAC/B,GAAG,2CAA4B,CAAC,MAAM;QACtC,GAAG,2CAA4B,CAAC,MAAM;QACtC,GAAG,8BAA8B;KACpC,CAAC,CAAC;AACP,CAAC,CAAC;AAEF;;;;;;;;;;;;GAYG;AACI,MAAM,iCAAiC,GAAG,CAC7C,qBAA+B,EAC/B,qBAA+B,EAC/B,4BAA4B,GAAG,KAAK,EAChC,EAAE;IACN,2DAA2D;IAC3D,2DAA2D;IAE3D,IAAI,qBAAqB,IAAI,qBAAqB,CAAC,MAAM,EAAE;QACvD,mBAAmB,GAAG,IAAA,iDAAuB,EAAC,qBAAqB,EAAE;YACjE,4BAA4B;YAC5B,iBAAiB,EAAE,IAAI;SAC1B,CAAC,CAAC;KACN;IAED,0CAA0C;IAC1C,uBAAuB,CAAC,qBAAqB,CAAC,CAAC;AACnD,CAAC,CAAC;AAjBW,QAAA,iCAAiC,qCAiB5C;AAEF;;;;;;GAMG;AACI,MAAM,cAAc,GAAG,CAC1B,IAAW,EACX,sBAAiC,EACpB,EAAE;IACf,kCAAkC;IAClC,sCAAsC;IAEtC,2EAA2E;IAC3E,+EAA+E;IAE/E,sEAAsE;IACtE,oEAAoE;IACpE,IAAI;IAEJ,iCAAiC;IACjC,8BAA8B;IAC9B,gCAAgC;IAChC,MAAM;IAEN,uEAAuE;IACvE,0DAA0D;IAC1D,YAAY;IACZ,qBAAqB;IACrB,KAAK;IAEL,uEAAuE;IACvE,6EAA6E;IAC7E,oEAAoE;IACpE,gDAAgD;IAChD,2EAA2E;IAC3E,IAAI;IAEJ,8EAA8E;IAC9E,2DAA2D;IAC3D,gCAAgC;IAChC,qCAAqC;IACrC,QAAQ;IAER,yCAAyC;IACzC,gBAAgB;IAChB,kCAAkC;IAClC,4BAA4B;IAC5B,SAAS;IACT,IAAI;IAEJ,kBAAkB;IAElB,IAAI,CAAC,iBAAiB,EAAE;QACpB,uBAAuB,EAAE,CAAC;KAC7B;IAED,OAAO,IAAA,oCAA0B,EAAC;QAC9B,IAAI;QACJ,kBAAkB,EAAE,cAAc;QAClC,aAAa,EAAE,mBAAmB;QAClC,WAAW,EAAE,sBAAsB;QACnC,WAAW,EAAE,iBAAiB;KACjC,CAAC,CAAC;AACP,CAAC,CAAC;AA1DW,QAAA,cAAc,kBA0DzB;AAEF;;;;GAIG;AACI,MAAM,iBAAiB,GAAG,CAAC,GAAW,EAAW,EAAE;IACtD,oBAAoB;IACpB,sCAAsC;IACtC,qDAAqD;IACrD,gDAAgD;IAChD,aAAa;IACb,yBAAyB;IACzB,IAAI;IAEJ,uCAAuC;IAEvC,MAAM,SAAS,GAAG,IAAA,sBAAc,EAAC;QAC7B,KAAK,EAAE,GAAG;QACV,OAAO,EAAE,EAAE;KACL,CAAC,CAAC;IAEZ,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC;AAChC,CAAC,CAAC;AAjBW,QAAA,iBAAiB,qBAiB5B;AAEF;;;;GAIG;AACI,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAW,EAAE;IACxD,IAAI,CAAC,iBAAiB,EAAE;QACpB,uBAAuB,EAAE,CAAC;KAC7B;IAED,OAAO,IAAA,sBAAY,EAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;AAChD,CAAC,CAAC;AANW,QAAA,mBAAmB,uBAM9B"}
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scoringConfig.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/scoringConfig.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAElC,QAAA,aAAa,GAAG;IACzB,MAAM,EAAE,EAAE;IACV,aAAa,EAAE;QACX,OAAO,EAAE;YACL,sBAAsB,EAAE,IAAI;YAC5B,kBAAkB,EAAE,IAAI;YACxB,oBAAoB,EAAE,IAAI;YAC1B,mBAAmB,EAAE,IAAI;SAC5B;KACJ;CACJ,CAAC"}