@vannizhang/living-atlas-content-validator 1.5.17 → 1.5.18-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1376 -162
- package/dist/configureSettings.d.ts +133 -0
- package/dist/configureSettings.js +61 -0
- package/dist/configureSettings.js.map +1 -0
- package/dist/data/TitleSummaryMatchingPatterns_Locations.json +829 -0
- package/dist/data/TitleSummaryMatchingPatterns_Sources.json +382 -0
- package/dist/data/TitleSummaryMatchingPatterns_Time.json +25 -0
- package/dist/data/TitleSummaryMatchingPatterns_Topics.json +1365 -0
- package/dist/data/TitleSummaryRejectedPatterns.json +44 -0
- package/dist/index.d.ts +27 -9
- package/dist/index.js +55 -9
- package/dist/index.js.map +1 -1
- package/dist/lib/accessInformation/isValidAccessInformation.js +4 -6
- package/dist/lib/accessInformation/isValidAccessInformation.js.map +1 -1
- package/dist/lib/checkProfanities/checkProfanities.d.ts +7 -0
- package/dist/lib/checkProfanities/checkProfanities.js +26 -0
- package/dist/lib/checkProfanities/checkProfanities.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.d.ts +34 -0
- package/dist/lib/{recommendedText/checkRecommendedText.js → checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.js} +51 -25
- package/dist/lib/checkTitleAndSnippetSearchability/checkTitleAndSnippetSearchability.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/config.d.ts +97 -0
- package/dist/lib/checkTitleAndSnippetSearchability/config.js +41 -0
- package/dist/lib/checkTitleAndSnippetSearchability/config.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/data.d.ts +0 -0
- package/dist/lib/checkTitleAndSnippetSearchability/data.js +693 -0
- package/dist/lib/checkTitleAndSnippetSearchability/data.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/helpers.d.ts +117 -0
- package/dist/lib/checkTitleAndSnippetSearchability/helpers.js +241 -0
- package/dist/lib/checkTitleAndSnippetSearchability/helpers.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.d.ts +34 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.js +207 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.d.ts +34 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.js +170 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchLocationInfo.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.d.ts +34 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.js +172 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchSourceInfo.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.d.ts +36 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.js +175 -0
- package/dist/lib/checkTitleAndSnippetSearchability/matchTopicInfo.js.map +1 -0
- package/dist/lib/checkTitleAndSnippetSearchability/scoringConfig.js.map +1 -0
- package/dist/lib/config.d.ts +4 -1
- package/dist/lib/config.js +5 -5
- package/dist/lib/config.js.map +1 -1
- package/dist/lib/deleteProtection/isDeleteProtectionEnabled.js +6 -2
- package/dist/lib/deleteProtection/isDeleteProtectionEnabled.js.map +1 -1
- package/dist/lib/deprecated/isDeprecated.js +5 -2
- package/dist/lib/deprecated/isDeprecated.js.map +1 -1
- package/dist/lib/description/isValidDescription.js +8 -5
- package/dist/lib/description/isValidDescription.js.map +1 -1
- package/dist/lib/licenseInfo/isValidLicenseInfo.js +5 -2
- package/dist/lib/licenseInfo/isValidLicenseInfo.js.map +1 -1
- package/dist/lib/sharing/isValidAccess.js +5 -2
- package/dist/lib/sharing/isValidAccess.js.map +1 -1
- package/dist/lib/ssl/isValidSSL.js +15 -6
- package/dist/lib/ssl/isValidSSL.js.map +1 -1
- package/dist/lib/summary/isValidSummary.js +6 -3
- package/dist/lib/summary/isValidSummary.js.map +1 -1
- package/dist/lib/tags/isValidTags.js +7 -3
- package/dist/lib/tags/isValidTags.js.map +1 -1
- package/dist/lib/thumbnail/isValidThumbnail.d.ts +6 -1
- package/dist/lib/thumbnail/isValidThumbnail.js +10 -7
- package/dist/lib/thumbnail/isValidThumbnail.js.map +1 -1
- package/dist/lib/title/isValidTitle.js +9 -3
- package/dist/lib/title/isValidTitle.js.map +1 -1
- package/dist/lib/userProfileDescription/isValidUserProfileDescription.js +44 -38
- package/dist/lib/userProfileDescription/isValidUserProfileDescription.js.map +1 -1
- package/dist/lib/userProfileName/isValidUserProfileName.js +8 -5
- package/dist/lib/userProfileName/isValidUserProfileName.js.map +1 -1
- package/dist/lib/userProfileThumbnail/isValidUserProfileThumbnail.js +6 -3
- package/dist/lib/userProfileThumbnail/isValidUserProfileThumbnail.js.map +1 -1
- package/dist/lib/util/containsNonEnglishCharacters.d.ts +13 -0
- package/dist/lib/util/containsNonEnglishCharacters.js +30 -0
- package/dist/lib/util/containsNonEnglishCharacters.js.map +1 -0
- package/dist/lib/util/countSentences.d.ts +23 -0
- package/dist/lib/util/countSentences.js +54 -0
- package/dist/lib/util/countSentences.js.map +1 -0
- package/dist/lib/util/createWordBoundaryRegex.d.ts +46 -0
- package/dist/lib/util/createWordBoundaryRegex.js +77 -0
- package/dist/lib/util/createWordBoundaryRegex.js.map +1 -0
- package/dist/lib/util/escapeSpecialCharacters.d.ts +7 -0
- package/dist/lib/util/escapeSpecialCharacters.js +22 -0
- package/dist/lib/util/escapeSpecialCharacters.js.map +1 -0
- package/dist/lib/util/getLivingAtlasSupportedItemTypes.d.ts +21 -0
- package/dist/lib/util/getLivingAtlasSupportedItemTypes.js +34 -0
- package/dist/lib/util/getLivingAtlasSupportedItemTypes.js.map +1 -0
- package/dist/lib/util/getNumberOfWords.js +6 -2
- package/dist/lib/util/getNumberOfWords.js.map +1 -1
- package/dist/lib/util/getScoringRules.js +6 -0
- package/dist/lib/util/getScoringRules.js.map +1 -1
- package/dist/lib/util/isLayer.js +3 -2
- package/dist/lib/util/isLayer.js.map +1 -1
- package/dist/lib/util/isUrl.d.ts +6 -0
- package/dist/lib/util/isUrl.js +16 -1
- package/dist/lib/util/isUrl.js.map +1 -1
- package/dist/lib/util/isValidRegexPattern.d.ts +7 -0
- package/dist/lib/util/isValidRegexPattern.js +20 -0
- package/dist/lib/util/isValidRegexPattern.js.map +1 -0
- package/dist/lib/util/sanitizeTags.d.ts +6 -0
- package/dist/lib/util/sanitizeTags.js +16 -0
- package/dist/lib/util/sanitizeTags.js.map +1 -0
- package/dist/lib/util/shouldValidateByBetaRules.js +6 -1
- package/dist/lib/util/shouldValidateByBetaRules.js.map +1 -1
- package/dist/lib/util/stringsConfig.d.ts +9 -0
- package/dist/lib/util/stringsConfig.js +9 -1
- package/dist/lib/util/stringsConfig.js.map +1 -1
- package/dist/lib/validate/validate.d.ts +57 -12
- package/dist/lib/validate/validate.js +87 -213
- package/dist/lib/validate/validate.js.map +1 -1
- package/dist/lib/validate/validateHelpers.d.ts +63 -0
- package/dist/lib/validate/validateHelpers.js +157 -0
- package/dist/lib/validate/validateHelpers.js.map +1 -0
- package/dist/locale/de.json +1 -1
- package/dist/locale/en.json +23 -23
- package/dist/locale/es.json +1 -1
- package/dist/locale/fr.json +1 -1
- package/dist/locale/ja.json +1 -1
- package/dist/locale/pt-br.json +1 -1
- package/dist/package-info.json +1 -1
- package/dist/services/content-validator-assets/config.d.ts +4 -0
- package/dist/services/content-validator-assets/config.js +8 -0
- package/dist/services/content-validator-assets/config.js.map +1 -0
- package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.d.ts +95 -0
- package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.js +92 -0
- package/dist/services/content-validator-assets/fetchAdditonalPatterns4TitleAndSnippetSearchability.js.map +1 -0
- package/dist/services/content-validator-assets/fetchProfanitiesData.d.ts +18 -0
- package/dist/services/content-validator-assets/fetchProfanitiesData.js +55 -0
- package/dist/services/content-validator-assets/fetchProfanitiesData.js.map +1 -0
- package/dist/services/content-validator-assets/helpers.d.ts +16 -0
- package/dist/services/content-validator-assets/helpers.js +35 -0
- package/dist/services/content-validator-assets/helpers.js.map +1 -0
- package/dist/services/custom-terms/config.d.ts +27 -0
- package/dist/services/custom-terms/config.js +79 -0
- package/dist/services/custom-terms/config.js.map +1 -0
- package/dist/services/custom-terms/customTerms.d.ts +229 -0
- package/dist/services/custom-terms/customTerms.js +394 -0
- package/dist/services/custom-terms/customTerms.js.map +1 -0
- package/dist/services/custom-terms/helpers.d.ts +8 -0
- package/dist/services/custom-terms/helpers.js +25 -0
- package/dist/services/custom-terms/helpers.js.map +1 -0
- package/dist/services/custom-terms/index.d.ts +3 -0
- package/dist/services/custom-terms/index.js +10 -0
- package/dist/services/custom-terms/index.js.map +1 -0
- package/dist/services/custom-terms-review-results/config.d.ts +31 -0
- package/dist/services/custom-terms-review-results/config.js +78 -0
- package/dist/services/custom-terms-review-results/config.js.map +1 -0
- package/dist/services/custom-terms-review-results/customTermsReviewResults.d.ts +133 -0
- package/dist/services/custom-terms-review-results/customTermsReviewResults.js +276 -0
- package/dist/services/custom-terms-review-results/customTermsReviewResults.js.map +1 -0
- package/dist/services/custom-terms-review-results/helpers.d.ts +24 -0
- package/dist/services/custom-terms-review-results/helpers.js +52 -0
- package/dist/services/custom-terms-review-results/helpers.js.map +1 -0
- package/dist/services/custom-terms-review-results/index.d.ts +4 -0
- package/dist/services/custom-terms-review-results/index.js +13 -0
- package/dist/services/custom-terms-review-results/index.js.map +1 -0
- package/dist/services/shared/addFeatures.d.ts +28 -0
- package/dist/services/shared/addFeatures.js +52 -0
- package/dist/services/shared/addFeatures.js.map +1 -0
- package/dist/services/shared/applyEdits.d.ts +28 -0
- package/dist/services/shared/applyEdits.js +53 -0
- package/dist/services/shared/applyEdits.js.map +1 -0
- package/dist/services/shared/config.d.ts +44 -0
- package/dist/services/shared/config.js +35 -0
- package/dist/services/shared/config.js.map +1 -0
- package/dist/services/shared/getItemInfo.d.ts +36 -0
- package/dist/services/shared/getItemInfo.js +56 -0
- package/dist/services/shared/getItemInfo.js.map +1 -0
- package/dist/types/index.d.ts +25 -22
- package/package.json +4 -3
- package/dist/__tests__/test-data/title-summary-data.json +0 -14654
- package/dist/data/TitleSummaryMatchingPatterns.json +0 -1902
- package/dist/lib/layers/isValidLayerCount.d.ts +0 -12
- package/dist/lib/layers/isValidLayerCount.js +0 -171
- package/dist/lib/layers/isValidLayerCount.js.map +0 -1
- package/dist/lib/layers/scoringConfig.d.ts +0 -10
- package/dist/lib/layers/scoringConfig.js +0 -20
- package/dist/lib/layers/scoringConfig.js.map +0 -1
- package/dist/lib/recommendedText/checkRecommendedText.d.ts +0 -15
- package/dist/lib/recommendedText/checkRecommendedText.js.map +0 -1
- package/dist/lib/recommendedText/helpers.d.ts +0 -15
- package/dist/lib/recommendedText/helpers.js +0 -62
- package/dist/lib/recommendedText/helpers.js.map +0 -1
- package/dist/lib/recommendedText/matchDateTimeInfo.d.ts +0 -9
- package/dist/lib/recommendedText/matchDateTimeInfo.js +0 -81
- package/dist/lib/recommendedText/matchDateTimeInfo.js.map +0 -1
- package/dist/lib/recommendedText/matchLocationInfo.d.ts +0 -9
- package/dist/lib/recommendedText/matchLocationInfo.js +0 -745
- package/dist/lib/recommendedText/matchLocationInfo.js.map +0 -1
- package/dist/lib/recommendedText/matchSourceInfo.d.ts +0 -9
- package/dist/lib/recommendedText/matchSourceInfo.js +0 -32
- package/dist/lib/recommendedText/matchSourceInfo.js.map +0 -1
- package/dist/lib/recommendedText/matchTopicInfo.d.ts +0 -9
- package/dist/lib/recommendedText/matchTopicInfo.js +0 -32
- package/dist/lib/recommendedText/matchTopicInfo.js.map +0 -1
- package/dist/lib/recommendedText/scoringConfig.js.map +0 -1
- /package/dist/lib/{recommendedText → checkTitleAndSnippetSearchability}/scoringConfig.d.ts +0 -0
- /package/dist/lib/{recommendedText → checkTitleAndSnippetSearchability}/scoringConfig.js +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/data.ts"],"names":[],"mappings":"AAAA,oCAAoC;AACpC,qBAAqB;AACrB,iBAAiB;AACjB,iBAAiB;AACjB,wBAAwB;AACxB,iBAAiB;AACjB,gBAAgB;AAChB,kBAAkB;AAClB,oBAAoB;AACpB,6BAA6B;AAC7B,mBAAmB;AACnB,iBAAiB;AACjB,eAAe;AACf,mBAAmB;AACnB,iBAAiB;AACjB,oBAAoB;AACpB,gBAAgB;AAChB,iBAAiB;AACjB,iBAAiB;AACjB,oBAAoB;AACpB,kBAAkB;AAClB,iBAAiB;AACjB,iBAAiB;AACjB,gBAAgB;AAChB,eAAe;AACf,iBAAiB;AACjB,gBAAgB;AAChB,iBAAiB;AACjB,iBAAiB;AACjB,gCAAgC;AAChC,kBAAkB;AAClB,uBAAuB;AACvB,gBAAgB;AAChB,wCAAwC;AACxC,gCAAgC;AAChC,2BAA2B;AAC3B,kBAAkB;AAClB,sBAAsB;AACtB,iBAAiB;AACjB,oBAAoB;AACpB,kBAAkB;AAClB,kBAAkB;AAClB,gBAAgB;AAChB,kBAAkB;AAClB,wBAAwB;AACxB,kCAAkC;AAClC,cAAc;AACd,eAAe;AACf,eAAe;AACf,0BAA0B;AAC1B,uBAAuB;AACvB,kBAAkB;AAClB,iBAAiB;AACjB,eAAe;AACf,mBAAmB;AACnB,sBAAsB;AACtB,oBAAoB;AACpB,uBAAuB;AACvB,iBAAiB;AACjB,cAAc;AACd,iBAAiB;AACjB,gBAAgB;AAChB,wBAAwB;AACxB,iBAAiB;AACjB,kBAAkB;AAClB,kBAAkB;AAClB,4BAA4B;AAC5B,iBAAiB;AACjB,eAAe;AACf,qBAAqB;AACrB,2BAA2B;AAC3B,iBAAiB;AACjB,iBAAiB;AACjB,kBAAkB;AAClB,kBAAkB;AAClB,0BAA0B;AAC1B,uBAAuB;AACvB,cAAc;AACd,iBAAiB;AACjB,gBAAgB;AAChB,uBAAuB;AACvB,0BAA0B;AAC1B,qCAAqC;AACrC,eAAe;AACf,gBAAgB;AAChB,iBAAiB;AACjB,iBAAiB;AACjB,eAAe;AACf,mBAAmB;AACnB,0BAA0B;AAC1B,gBAAgB;AAChB,mBAAmB;AACnB,iBAAiB;AACjB,oBAAoB;AACpB,cAAc;AACd,mBAAmB;AACnB,kBAAkB;AAClB,gBAAgB;AAChB,uBAAuB;AACvB,gBAAgB;AAChB,eAAe;AACf,2CAA2C;AAC3C,kBAAkB;AAClB,iBAAiB;AACjB,iBAAiB;AACjB,eAAe;AACf,mBAAmB;AACnB,cAAc;AACd,cAAc;AACd,iBAAiB;AACjB,qBAAqB;AACrB,gBAAgB;AAChB,eAAe;AACf,iBAAiB;AACjB,eAAe;AACf,gBAAgB;AAChB,gBAAgB;AAChB,6BAA6B;AAC7B,oBAAoB;AACpB,eAAe;AACf,kBAAkB;AAClB,gBAAgB;AAChB,oBAAoB;AACpB,cAAc;AACd,gBAAgB;AAChB,iBAAiB;AACjB,iBAAiB;AACjB,iBAAiB;AACjB,eAAe;AACf,uBAAuB;AACvB,mBAAmB;AACnB,oBAAoB;AACpB,oBAAoB;AACpB,iBAAiB;AACjB,gBAAgB;AAChB,kBAAkB;AAClB,kBAAkB;AAClB,cAAc;AACd,eAAe;AACf,0BAA0B;AAC1B,oBAAoB;AACpB,oBAAoB;AACpB,mBAAmB;AACnB,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB;AACpB,iBAAiB;AACjB,gBAAgB;AAChB,kBAAkB;AAClB,oBAAoB;AACpB,oBAAoB;AACpB,iBAAiB;AACjB,oBAAoB;AACpB,iBAAiB;AACjB,iBAAiB;AACjB,eAAe;AACf,eAAe;AACf,qBAAqB;AACrB,uBAAuB;AACvB,qBAAqB;AACrB,mBAAmB;AACnB,eAAe;AACf,iBAAiB;AACjB,cAAc;AACd,wBAAwB;AACxB,qBAAqB;AACrB,yBAAyB;AACzB,kCAAkC;AAClC,gBAAgB;AAChB,cAAc;AACd,kBAAkB;AAClB,eAAe;AACf,+BAA+B;AAC/B,gBAAgB;AAChB,0BAA0B;AAC1B,kBAAkB;AAClB,cAAc;AACd,qBAAqB;AACrB,kBAAkB;AAClB,gBAAgB;AAChB,kBAAkB;AAClB,qBAAqB;AACrB,eAAe;AACf,iBAAiB;AACjB,iBAAiB;AACjB,4BAA4B;AAC5B,gBAAgB;AAChB,cAAc;AACd,0BAA0B;AAC1B,yBAAyB;AACzB,sBAAsB;AACtB,+BAA+B;AAC/B,qBAAqB;AACrB,sBAAsB;AACtB,mCAAmC;AACnC,0CAA0C;AAC1C,eAAe;AACf,oBAAoB;AACpB,+BAA+B;AAC/B,sBAAsB;AACtB,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB;AACpB,sBAAsB;AACtB,mBAAmB;AACnB,sBAAsB;AACtB,kBAAkB;AAClB,kBAAkB;AAClB,yBAAyB;AACzB,iBAAiB;AACjB,sBAAsB;AACtB,kDAAkD;AAClD,qBAAqB;AACrB,qBAAqB;AACrB,eAAe;AACf,mBAAmB;AACnB,eAAe;AACf,kBAAkB;AAClB,kBAAkB;AAClB,gBAAgB;AAChB,qBAAqB;AACrB,eAAe;AACf,oBAAoB;AACpB,kBAAkB;AAClB,kBAAkB;AAClB,qBAAqB;AACrB,cAAc;AACd,iBAAiB;AACjB,eAAe;AACf,6BAA6B;AAC7B,iBAAiB;AACjB,iBAAiB;AACjB,sBAAsB;AACtB,kCAAkC;AAClC,gBAAgB;AAChB,gBAAgB;AAChB,iBAAiB;AACjB,8BAA8B;AAC9B,wBAAwB;AACxB,uBAAuB;AACvB,iBAAiB;AACjB,2BAA2B;AAC3B,oBAAoB;AACpB,iBAAiB;AACjB,sBAAsB;AACtB,mBAAmB;AACnB,iBAAiB;AACjB,2BAA2B;AAC3B,eAAe;AACf,gBAAgB;AAChB,kBAAkB;AAElB,iBAAiB;AACjB,gBAAgB;AAChB,iBAAiB;AACjB,kBAAkB;AAClB,oBAAoB;AACpB,kBAAkB;AAClB,qBAAqB;AACrB,kBAAkB;AAClB,8BAA8B;AAC9B,iBAAiB;AACjB,iBAAiB;AACjB,gBAAgB;AAChB,eAAe;AACf,kBAAkB;AAClB,iBAAiB;AACjB,cAAc;AACd,gBAAgB;AAChB,kBAAkB;AAClB,mBAAmB;AACnB,eAAe;AACf,kBAAkB;AAClB,uBAAuB;AACvB,kBAAkB;AAClB,mBAAmB;AACnB,qBAAqB;AACrB,kBAAkB;AAClB,iBAAiB;AACjB,kBAAkB;AAClB,gBAAgB;AAChB,uBAAuB;AACvB,oBAAoB;AACpB,oBAAoB;AACpB,kBAAkB;AAClB,wBAAwB;AACxB,sBAAsB;AACtB,cAAc;AACd,kBAAkB;AAClB,gBAAgB;AAChB,sBAAsB;AACtB,sBAAsB;AACtB,wBAAwB;AACxB,sBAAsB;AACtB,mBAAmB;AACnB,eAAe;AACf,cAAc;AACd,iBAAiB;AACjB,kBAAkB;AAClB,oBAAoB;AACpB,uBAAuB;AACvB,mBAAmB;AACnB,iBAAiB;AACjB,0BAA0B;AAC1B,wBAAwB;AACxB,uBAAuB;AACvB,cAAc;AAEd,iBAAiB;AACjB,eAAe;AACf,kBAAkB;AAClB,qBAAqB;AACrB,qBAAqB;AACrB,sBAAsB;AACtB,cAAc;AACd,eAAe;AACf,gBAAgB;AAChB,mBAAmB;AACnB,uBAAuB;AACvB,mBAAmB;AACnB,iBAAiB;AACjB,iBAAiB;AACjB,wBAAwB;AACxB,yBAAyB;AACzB,oBAAoB;AACpB,gBAAgB;AAChB,oBAAoB;AACpB,eAAe;AACf,kBAAkB;AAClB,mBAAmB;AACnB,uBAAuB;AACvB,iBAAiB;AACjB,kBAAkB;AAClB,mBAAmB;AACnB,eAAe;AACf,eAAe;AACf,kBAAkB;AAClB,mBAAmB;AACnB,gBAAgB;AAChB,iBAAiB;AACjB,mBAAmB;AACnB,mBAAmB;AACnB,sBAAsB;AACtB,mBAAmB;AACnB,uBAAuB;AACvB,iBAAiB;AACjB,oBAAoB;AACpB,gBAAgB;AAChB,iBAAiB;AACjB,kBAAkB;AAClB,qBAAqB;AACrB,qBAAqB;AACrB,kBAAkB;AAClB,kBAAkB;AAClB,iBAAiB;AACjB,0BAA0B;AAC1B,kBAAkB;AAClB,oBAAoB;AACpB,oBAAoB;AACpB,uBAAuB;AACvB,oBAAoB;AACpB,gBAAgB;AAChB,oBAAoB;AACpB,uBAAuB;AACvB,kBAAkB;AAClB,iBAAiB;AACjB,gBAAgB;AAChB,sBAAsB;AACtB,qBAAqB;AACrB,oBAAoB;AACpB,iBAAiB;AACjB,sBAAsB;AACtB,gBAAgB;AAChB,qBAAqB;AACrB,qBAAqB;AACrB,mBAAmB;AACnB,gBAAgB;AAChB,uBAAuB;AACvB,iBAAiB;AACjB,kBAAkB;AAClB,eAAe;AACf,0BAA0B;AAC1B,eAAe;AACf,kBAAkB;AAClB,iBAAiB;AACjB,oBAAoB;AACpB,gBAAgB;AAChB,oBAAoB;AACpB,qBAAqB;AACrB,kBAAkB;AAClB,qBAAqB;AACrB,kBAAkB;AAClB,oBAAoB;AACpB,oBAAoB;AACpB,yBAAyB;AACzB,kBAAkB;AAClB,mBAAmB;AACnB,0BAA0B;AAC1B,oBAAoB;AACpB,oBAAoB;AACpB,kBAAkB;AAClB,mBAAmB;AACnB,gBAAgB;AAChB,yBAAyB;AACzB,kBAAkB;AAClB,mBAAmB;AACnB,mBAAmB;AACnB,sBAAsB;AACtB,mBAAmB;AACnB,kBAAkB;AAClB,gBAAgB;AAChB,iBAAiB;AACjB,iBAAiB;AACjB,kBAAkB;AAClB,mBAAmB;AACnB,eAAe;AACf,iBAAiB;AACjB,kBAAkB;AAClB,qBAAqB;AACrB,gBAAgB;AAChB,uBAAuB;AACvB,gBAAgB;AAChB,qBAAqB;AACrB,gBAAgB;AAChB,iBAAiB;AACjB,oBAAoB;AACpB,mBAAmB;AACnB,0BAA0B;AAC1B,uBAAuB;AACvB,iBAAiB;AACjB,qBAAqB;AACrB,kBAAkB;AAClB,cAAc;AACd,kBAAkB;AAClB,iBAAiB;AACjB,mBAAmB;AACnB,gBAAgB;AAChB,sBAAsB;AACtB,cAAc;AACd,kBAAkB;AAClB,gBAAgB;AAChB,gBAAgB;AAChB,gBAAgB;AAChB,0BAA0B;AAC1B,qBAAqB;AACrB,gBAAgB;AAChB,iBAAiB;AACjB,kBAAkB;AAClB,iBAAiB;AACjB,kBAAkB;AAClB,kBAAkB;AAClB,mBAAmB;AACnB,kBAAkB;AAClB,mBAAmB;AACnB,kBAAkB;AAClB,kBAAkB;AAClB,oBAAoB;AACpB,kBAAkB;AAClB,oBAAoB;AACpB,kBAAkB;AAClB,oBAAoB;AACpB,gBAAgB;AAChB,mBAAmB;AACnB,gBAAgB;AAChB,kBAAkB;AAClB,kBAAkB;AAClB,sBAAsB;AACtB,mBAAmB;AACnB,iBAAiB;AACjB,iBAAiB;AACjB,mBAAmB;AACnB,qBAAqB;AACrB,kBAAkB;AAClB,qBAAqB;AACrB,gBAAgB;AAChB,2BAA2B;AAC3B,oBAAoB;AACpB,kBAAkB;AAClB,iBAAiB;AACjB,iBAAiB;AACjB,qBAAqB;AACrB,qBAAqB;AACrB,kBAAkB;AAClB,sBAAsB;AACtB,wBAAwB;AACxB,sBAAsB;AACtB,mBAAmB;AACnB,mBAAmB;AACnB,0BAA0B;AAC1B,cAAc;AACd,iBAAiB;AACjB,eAAe;AACf,mBAAmB;AACnB,gBAAgB;AAChB,oBAAoB;AACpB,gBAAgB;AAChB,eAAe;AACf,qBAAqB;AACrB,yBAAyB;AACzB,sBAAsB;AACtB,qBAAqB;AACrB,oBAAoB;AACpB,eAAe;AACf,mBAAmB;AACnB,mBAAmB;AACnB,oBAAoB;AACpB,gBAAgB;AAChB,kBAAkB;AAClB,qBAAqB;AACrB,oBAAoB;AACpB,kBAAkB;AAClB,uBAAuB;AACvB,qBAAqB;AACrB,kBAAkB;AAClB,uBAAuB;AACvB,0BAA0B;AAC1B,sBAAsB;AACtB,wBAAwB;AACxB,kBAAkB;AAClB,gBAAgB;AAChB,oBAAoB;AACpB,oBAAoB;AACpB,gBAAgB;AAChB,iBAAiB;AACjB,qBAAqB;AACrB,wBAAwB;AACxB,uBAAuB;AACvB,mBAAmB;AACnB,kBAAkB;AAClB,iBAAiB;AACjB,sBAAsB;AACtB,uBAAuB;AACvB,mBAAmB;AACnB,iBAAiB;AACjB,oBAAoB;AACpB,oBAAoB;AACpB,uBAAuB;AACvB,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB;AACpB,wBAAwB;AACxB,iBAAiB;AACjB,kBAAkB;AAClB,qBAAqB;AACrB,uBAAuB;AACvB,iBAAiB;AACjB,iBAAiB;AACjB,oBAAoB;AACpB,iBAAiB;AACjB,mBAAmB;AACnB,oBAAoB;AACpB,iBAAiB;AACjB,oBAAoB;AACpB,kBAAkB;AAClB,oBAAoB;AACpB,uBAAuB;AACvB,gBAAgB;AAChB,gBAAgB;AAChB,iBAAiB;AACjB,wBAAwB;AACxB,yBAAyB;AACzB,oBAAoB;AACpB,cAAc;AACd,iBAAiB;AACjB,iBAAiB;AACjB,gBAAgB;AAChB,qBAAqB;AACrB,kBAAkB;AAClB,iBAAiB;AACjB,gBAAgB;AAChB,eAAe;AACf,iBAAiB;AACjB,mBAAmB;AACnB,qBAAqB;AACrB,oBAAoB;AACpB,mBAAmB;AACnB,oBAAoB;AACpB,iBAAiB;AACjB,gBAAgB;AAChB,oBAAoB;AACpB,mBAAmB;AACnB,kBAAkB;AAClB,gBAAgB;AAChB,mBAAmB;AACnB,mBAAmB;AACnB,wBAAwB;AACxB,kBAAkB;AAClB,2BAA2B;AAC3B,iBAAiB;AACjB,wBAAwB;AACxB,mBAAmB;AACnB,qBAAqB;AACrB,eAAe;AACf,gBAAgB;AAChB,oBAAoB;AACpB,mBAAmB;AACnB,iBAAiB;AACjB,qBAAqB;AACrB,eAAe;AACf,qBAAqB;AACrB,iBAAiB;AACjB,eAAe;AACf,wBAAwB;AACxB,oBAAoB;AACpB,iBAAiB;AACjB,0BAA0B;AAC1B,eAAe;AACf,iBAAiB;AACjB,cAAc;AACd,qBAAqB;AACrB,oBAAoB;AACpB,gBAAgB;AAChB,gBAAgB;AAChB,qBAAqB;AACrB,mBAAmB;AACnB,mBAAmB;AACnB,iBAAiB;AACjB,iBAAiB;AACjB,mBAAmB;AACnB,kBAAkB;AAClB,gBAAgB;AAChB,iBAAiB;AACjB,uBAAuB;AACvB,mBAAmB;AACnB,oBAAoB;AACpB,gBAAgB;AAChB,iBAAiB;AACjB,uBAAuB;AACvB,mBAAmB;AACnB,sBAAsB;AACtB,kBAAkB;AAClB,oBAAoB;AACpB,sBAAsB;AACtB,gBAAgB;AAChB,kBAAkB;AAClB,gBAAgB;AAChB,mBAAmB;AACnB,qBAAqB;AACrB,sBAAsB;AACtB,iBAAiB;AACjB,iBAAiB;AACjB,iBAAiB;AACjB,qBAAqB;AACrB,kBAAkB;AAElB,oBAAoB;AACpB,mBAAmB;AACnB,yBAAyB;AACzB,sBAAsB;AACtB,wBAAwB;AACxB,sBAAsB;AACtB,wBAAwB;AACxB,yBAAyB;AACzB,mBAAmB;AACnB,oBAAoB;AACpB,uBAAuB;AACvB,yBAAyB;AACzB,0BAA0B;AAC1B,yBAAyB;AACzB,mBAAmB;AACnB,uBAAuB;AACvB,2BAA2B;AAC3B,yBAAyB;AACzB,uBAAuB;AACvB,yBAAyB;AACzB,wBAAwB;AACxB,sBAAsB;AACtB,wBAAwB;AAExB,uBAAuB;AACvB,kCAAkC;AAClC,aAAa;AACb,6BAA6B;AAC7B,eAAe;AACf,gBAAgB;AAChB,uBAAuB;AACvB,uBAAuB;AACvB,gBAAgB;AAChB,gBAAgB;AAChB,cAAc;AACd,iBAAiB;AACjB,mBAAmB;AACnB,gBAAgB;AAChB,kBAAkB;AAClB,iBAAiB;AACjB,uBAAuB;AACvB,sBAAsB;AACtB,oBAAoB;AACpB,uBAAuB;AACvB,mBAAmB;AACnB,wBAAwB;AACxB,qBAAqB;AACrB,mBAAmB;AACnB,gBAAgB;AAChB,oBAAoB;AACpB,KAAK"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import { IItem } from '../../types';
|
|
2
|
+
type MatchUsingMultiplePatternsParams = {
|
|
3
|
+
/**
|
|
4
|
+
* The item to be checked for searchability.
|
|
5
|
+
*/
|
|
6
|
+
item: IItem;
|
|
7
|
+
/**
|
|
8
|
+
* The predefined patterns to match against the item's title and snippet.
|
|
9
|
+
*/
|
|
10
|
+
predefinedPatterns: RegExp;
|
|
11
|
+
/**
|
|
12
|
+
* Additional patterns to match against the item's title and snippet if no match is found with predefined patterns.
|
|
13
|
+
* These extra matching patterns are the patterns submitted by the user that have been approved by the Living Atlas team.
|
|
14
|
+
*/
|
|
15
|
+
extraPatterns: RegExp;
|
|
16
|
+
/**
|
|
17
|
+
* Custom terms to match against the item's title and snippet if no match is found with predefined or extra patterns.
|
|
18
|
+
* These custom terms are the terms users submitted using the Nominatim App
|
|
19
|
+
*/
|
|
20
|
+
customTerms: string[];
|
|
21
|
+
/**
|
|
22
|
+
* A set of terms that have been rejected by the Living Atlas team.
|
|
23
|
+
* any custom term that is found in this set will be ignored.
|
|
24
|
+
*/
|
|
25
|
+
rejectedSet: Set<string>;
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Represents the source of a match pattern.
|
|
29
|
+
*
|
|
30
|
+
*/
|
|
31
|
+
export type MatchPatternSource = 'predefined' | 'additional' | 'custom';
|
|
32
|
+
/**
|
|
33
|
+
* Represents the result of a pattern match operation.
|
|
34
|
+
*/
|
|
35
|
+
export type MatchResult = {
|
|
36
|
+
/**
|
|
37
|
+
* The string that was matched by the pattern.
|
|
38
|
+
*/
|
|
39
|
+
matchedString: string;
|
|
40
|
+
/**
|
|
41
|
+
* The source of the pattern that was used for matching: `'predefined' | 'additional' | 'custom'`
|
|
42
|
+
*/
|
|
43
|
+
patternSource: MatchPatternSource;
|
|
44
|
+
};
|
|
45
|
+
/**
|
|
46
|
+
* Checks if a string is found in a set of strings.
|
|
47
|
+
* @param str string to be checked
|
|
48
|
+
* @param set set of strings to be checked against
|
|
49
|
+
* @returns true if the string is found in the set, false otherwise
|
|
50
|
+
*/
|
|
51
|
+
export declare const isFoundInSet: (str: string, set: Set<string>) => boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Matches patterns in an item's title and snippet using a hierarchical approach:
|
|
54
|
+
* predefined patterns first, followed by extra patterns, and finally custom terms.
|
|
55
|
+
*
|
|
56
|
+
* @param {MatchUsingMultiplePatternsParams} params - The parameters for pattern matching.
|
|
57
|
+
* @param {IItem} params.item - The item containing the title and snippet to search for patterns.
|
|
58
|
+
* @param {RegExp} params.predefinedPatterns - The predefined patterns to match against the item's title and snippet.
|
|
59
|
+
* @param {RegExp} params.extraPatterns - Additional patterns to match against the item's title and snippet if no match is found with predefined patterns. These extra matching patterns are the patterns submitted by users and approved by the Living Atlas team.
|
|
60
|
+
* @param {string[]} params.customTerms - Custom terms to match against the item's title and snippet if no match is found with predefined or extra patterns. These custom terms are user-submitted via the Nominatim App.
|
|
61
|
+
* @param {Set<string>} params.rejectedSet - A set of terms rejected by the Living Atlas team. Any custom term found in this set will be ignored.
|
|
62
|
+
*
|
|
63
|
+
* @returns {string[]} - An array of matched strings after deduplication and filtering.
|
|
64
|
+
*/
|
|
65
|
+
export declare const matchUsingMultiplePatterns: ({ item, predefinedPatterns, extraPatterns, customTerms, rejectedSet, }: MatchUsingMultiplePatternsParams) => MatchResult[];
|
|
66
|
+
/**
|
|
67
|
+
* Matches patterns in the title and snippet of an item.
|
|
68
|
+
* @param item ArcGIS item object to be matched.
|
|
69
|
+
* @param regex Regular expression to match.
|
|
70
|
+
* @returns Array of matched strings.
|
|
71
|
+
*/
|
|
72
|
+
export declare const matchPatternsInTitleAndSnippet: (item: IItem, regex: RegExp, patternSource: MatchPatternSource) => MatchResult[];
|
|
73
|
+
/**
|
|
74
|
+
* Matches custom patterns in the title and snippet of an item.
|
|
75
|
+
* @param item ArcGIS item object to be matched.
|
|
76
|
+
* @param customMatchingPatterns Array of custom patterns to match.
|
|
77
|
+
* @param rejectedSet Set of rejected patterns.
|
|
78
|
+
*
|
|
79
|
+
* @returns Array of matched strings.
|
|
80
|
+
*/
|
|
81
|
+
export declare const matchWithCustomPatterns: (item: IItem, customMatchingPatterns: string[], rejectedSet: Set<string>) => MatchResult[];
|
|
82
|
+
/**
|
|
83
|
+
* Get a Set from an array of strings and convert all strings to lowercase.
|
|
84
|
+
* @param arr - The array of strings to convert to a Set
|
|
85
|
+
* @returns set - A Set containing the strings from the array
|
|
86
|
+
*/
|
|
87
|
+
export declare const toLowercaseSet: (arr: string[]) => Set<string>;
|
|
88
|
+
/**
|
|
89
|
+
* Determines if an item is eligible for checking title and snippet searchability.
|
|
90
|
+
*
|
|
91
|
+
* @param item {IITem} - The item to check.
|
|
92
|
+
* @returns {boolean} if the item meets the criteria for checking title and snippet searchability, otherwise `false`.
|
|
93
|
+
*
|
|
94
|
+
* The criteria for eligibility are:
|
|
95
|
+
* 1. The item is a layer.
|
|
96
|
+
* 2. The item is English-based.
|
|
97
|
+
* 3. The item's culture is 'en' or the item's owner is one of the English-based owners.
|
|
98
|
+
*
|
|
99
|
+
* @remarks
|
|
100
|
+
* - The locale is determined by both the item's culture and the item's owner.
|
|
101
|
+
*/
|
|
102
|
+
export declare const isEligibleForCheckingTitleAndSnippetSearchability: (item: IItem) => boolean;
|
|
103
|
+
/**
|
|
104
|
+
* Clean up matched results by removing duplicates and overlapping strings.
|
|
105
|
+
* @param matchedResults array of matched results
|
|
106
|
+
* @returns an array of cleaned up matched MatchResult
|
|
107
|
+
*/
|
|
108
|
+
export declare const cleanUpMatchedResults: (matchedResults: MatchResult[]) => MatchResult[];
|
|
109
|
+
export declare const removeOverlapped: (arr: MatchResult[]) => MatchResult[];
|
|
110
|
+
/**
|
|
111
|
+
* Removes duplicate matched results from an array, ignoring case sensitivity.
|
|
112
|
+
*
|
|
113
|
+
* @param arr - The array of MatchResult to deduplicate.
|
|
114
|
+
* @returns A new array with duplicates removed, preserving the original case of the first occurrence.
|
|
115
|
+
*/
|
|
116
|
+
export declare const deduplicate: (arr: MatchResult[]) => MatchResult[];
|
|
117
|
+
export {};
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.deduplicate = exports.removeOverlapped = exports.cleanUpMatchedResults = exports.isEligibleForCheckingTitleAndSnippetSearchability = exports.toLowercaseSet = exports.matchWithCustomPatterns = exports.matchPatternsInTitleAndSnippet = exports.matchUsingMultiplePatterns = exports.isFoundInSet = void 0;
|
|
4
|
+
const isLayer_1 = require("../util/isLayer");
|
|
5
|
+
const getScoringRules_1 = require("../util/getScoringRules");
|
|
6
|
+
const createWordBoundaryRegex_1 = require("../util/createWordBoundaryRegex");
|
|
7
|
+
/**
|
|
8
|
+
* Checks if a string is found in a set of strings.
|
|
9
|
+
* @param str string to be checked
|
|
10
|
+
* @param set set of strings to be checked against
|
|
11
|
+
* @returns true if the string is found in the set, false otherwise
|
|
12
|
+
*/
|
|
13
|
+
const isFoundInSet = (str, set) => {
|
|
14
|
+
if (!set) {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
// str = escapeSpecialCharacters([str])[0];
|
|
18
|
+
if (!str) {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
return set.has(str.toLowerCase());
|
|
22
|
+
};
|
|
23
|
+
exports.isFoundInSet = isFoundInSet;
|
|
24
|
+
/**
|
|
25
|
+
* Matches patterns in an item's title and snippet using a hierarchical approach:
|
|
26
|
+
* predefined patterns first, followed by extra patterns, and finally custom terms.
|
|
27
|
+
*
|
|
28
|
+
* @param {MatchUsingMultiplePatternsParams} params - The parameters for pattern matching.
|
|
29
|
+
* @param {IItem} params.item - The item containing the title and snippet to search for patterns.
|
|
30
|
+
* @param {RegExp} params.predefinedPatterns - The predefined patterns to match against the item's title and snippet.
|
|
31
|
+
* @param {RegExp} params.extraPatterns - Additional patterns to match against the item's title and snippet if no match is found with predefined patterns. These extra matching patterns are the patterns submitted by users and approved by the Living Atlas team.
|
|
32
|
+
* @param {string[]} params.customTerms - Custom terms to match against the item's title and snippet if no match is found with predefined or extra patterns. These custom terms are user-submitted via the Nominatim App.
|
|
33
|
+
* @param {Set<string>} params.rejectedSet - A set of terms rejected by the Living Atlas team. Any custom term found in this set will be ignored.
|
|
34
|
+
*
|
|
35
|
+
* @returns {string[]} - An array of matched strings after deduplication and filtering.
|
|
36
|
+
*/
|
|
37
|
+
const matchUsingMultiplePatterns = ({ item, predefinedPatterns, extraPatterns, customTerms, rejectedSet, }) => {
|
|
38
|
+
// try to match with predefined patterns first
|
|
39
|
+
const matchedByPredefinedPatterns = (0, exports.matchPatternsInTitleAndSnippet)(item, predefinedPatterns, 'predefined');
|
|
40
|
+
if (matchedByPredefinedPatterns.length) {
|
|
41
|
+
return matchedByPredefinedPatterns;
|
|
42
|
+
}
|
|
43
|
+
// if no match, try to match with extra patterns
|
|
44
|
+
const matchedByAdditionalPatterns = (0, exports.matchPatternsInTitleAndSnippet)(item, extraPatterns, 'additional');
|
|
45
|
+
if (matchedByAdditionalPatterns.length) {
|
|
46
|
+
return matchedByAdditionalPatterns;
|
|
47
|
+
}
|
|
48
|
+
// if no match, try to match with custom terms
|
|
49
|
+
return (0, exports.matchWithCustomPatterns)(item, customTerms, rejectedSet);
|
|
50
|
+
};
|
|
51
|
+
exports.matchUsingMultiplePatterns = matchUsingMultiplePatterns;
|
|
52
|
+
/**
|
|
53
|
+
* Matches patterns in the title and snippet of an item.
|
|
54
|
+
* @param item ArcGIS item object to be matched.
|
|
55
|
+
* @param regex Regular expression to match.
|
|
56
|
+
* @returns Array of matched strings.
|
|
57
|
+
*/
|
|
58
|
+
const matchPatternsInTitleAndSnippet = (item, regex, patternSource) => {
|
|
59
|
+
const title = (item === null || item === void 0 ? void 0 : item.title) || '';
|
|
60
|
+
const snippet = (item === null || item === void 0 ? void 0 : item.snippet) || '';
|
|
61
|
+
if (!title && !snippet) {
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
if (!regex) {
|
|
65
|
+
return [];
|
|
66
|
+
}
|
|
67
|
+
try {
|
|
68
|
+
// const matchedStrFromTitle: string[] = title.match(regex) || [];
|
|
69
|
+
// const matchedStrFromSnippet: string[] = snippet.match(regex) || [];
|
|
70
|
+
// return cleanUpMatchedResults([
|
|
71
|
+
// ...matchedStrFromTitle,
|
|
72
|
+
// ...matchedStrFromSnippet,
|
|
73
|
+
// ]);
|
|
74
|
+
if (!regex.flags.includes('g')) {
|
|
75
|
+
regex = new RegExp(regex.source, `${regex.flags}g`);
|
|
76
|
+
}
|
|
77
|
+
const combined = `${title}\n${snippet}`;
|
|
78
|
+
const matches = combined.match(regex) || [];
|
|
79
|
+
const matchResults = matches.map((matchedString) => ({
|
|
80
|
+
matchedString,
|
|
81
|
+
patternSource,
|
|
82
|
+
}));
|
|
83
|
+
return (0, exports.cleanUpMatchedResults)(matchResults);
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
console.error('Error in matchPatternsInTitleAndSnippet:', error);
|
|
87
|
+
return [];
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
exports.matchPatternsInTitleAndSnippet = matchPatternsInTitleAndSnippet;
|
|
91
|
+
/**
|
|
92
|
+
* Matches custom patterns in the title and snippet of an item.
|
|
93
|
+
* @param item ArcGIS item object to be matched.
|
|
94
|
+
* @param customMatchingPatterns Array of custom patterns to match.
|
|
95
|
+
* @param rejectedSet Set of rejected patterns.
|
|
96
|
+
*
|
|
97
|
+
* @returns Array of matched strings.
|
|
98
|
+
*/
|
|
99
|
+
const matchWithCustomPatterns = (item, customMatchingPatterns, rejectedSet) => {
|
|
100
|
+
if (!customMatchingPatterns || customMatchingPatterns.length === 0) {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
// remove any leading and trailing spaces
|
|
104
|
+
// and filter out any empty strings and rejected patterns
|
|
105
|
+
// return an empty array if no patterns left after filtering
|
|
106
|
+
customMatchingPatterns = customMatchingPatterns
|
|
107
|
+
.map((pattern) => pattern.trim())
|
|
108
|
+
.filter((pattern) => pattern !== '' && (0, exports.isFoundInSet)(pattern, rejectedSet) === false);
|
|
109
|
+
if (customMatchingPatterns.length === 0) {
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
// const escapedCustomPatterns = escapeSpecialCharacters(
|
|
113
|
+
// customMatchingPatterns
|
|
114
|
+
// );
|
|
115
|
+
// const customPatternsRegex = new RegExp(
|
|
116
|
+
// `\\b(${escapedCustomPatterns.join('|')})\\b`,
|
|
117
|
+
// 'gi'
|
|
118
|
+
// );
|
|
119
|
+
// const matchedStrFromTitle = title.match(customPatternsRegex) || [];
|
|
120
|
+
// const matchedStrFromSnippet = snippet.match(customPatternsRegex) || [];
|
|
121
|
+
// if (!matchedStrFromTitle.length && !matchedStrFromSnippet.length) {
|
|
122
|
+
// return [];
|
|
123
|
+
// }
|
|
124
|
+
// // console.log('matchedStrFromTitle', matchedStrFromTitle);
|
|
125
|
+
// // console.log('matchedStrFromSnippet', matchedStrFromSnippet);
|
|
126
|
+
// const output = deduplicate([
|
|
127
|
+
// ...matchedStrFromTitle,
|
|
128
|
+
// ...matchedStrFromSnippet,
|
|
129
|
+
// ]);
|
|
130
|
+
// // console.log('output', output);
|
|
131
|
+
// return removeOverlapped(output);
|
|
132
|
+
const customPatternsRegex = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(customMatchingPatterns);
|
|
133
|
+
return (0, exports.matchPatternsInTitleAndSnippet)(item, customPatternsRegex, 'custom');
|
|
134
|
+
};
|
|
135
|
+
exports.matchWithCustomPatterns = matchWithCustomPatterns;
|
|
136
|
+
/**
|
|
137
|
+
* Get a Set from an array of strings and convert all strings to lowercase.
|
|
138
|
+
* @param arr - The array of strings to convert to a Set
|
|
139
|
+
* @returns set - A Set containing the strings from the array
|
|
140
|
+
*/
|
|
141
|
+
const toLowercaseSet = (arr) => {
|
|
142
|
+
if (!arr || arr.length === 0) {
|
|
143
|
+
return new Set();
|
|
144
|
+
}
|
|
145
|
+
const strs = arr.map((d) => d.toLowerCase());
|
|
146
|
+
return new Set(strs);
|
|
147
|
+
};
|
|
148
|
+
exports.toLowercaseSet = toLowercaseSet;
|
|
149
|
+
/**
|
|
150
|
+
* Determines if an item is eligible for checking title and snippet searchability.
|
|
151
|
+
*
|
|
152
|
+
* @param item {IITem} - The item to check.
|
|
153
|
+
* @returns {boolean} if the item meets the criteria for checking title and snippet searchability, otherwise `false`.
|
|
154
|
+
*
|
|
155
|
+
* The criteria for eligibility are:
|
|
156
|
+
* 1. The item is a layer.
|
|
157
|
+
* 2. The item is English-based.
|
|
158
|
+
* 3. The item's culture is 'en' or the item's owner is one of the English-based owners.
|
|
159
|
+
*
|
|
160
|
+
* @remarks
|
|
161
|
+
* - The locale is determined by both the item's culture and the item's owner.
|
|
162
|
+
*/
|
|
163
|
+
const isEligibleForCheckingTitleAndSnippetSearchability = (item) => {
|
|
164
|
+
if (!item) {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
const isLayerItem = (0, isLayer_1.isLayer)(item);
|
|
168
|
+
if (!isLayerItem) {
|
|
169
|
+
return false;
|
|
170
|
+
}
|
|
171
|
+
const localeByCultreProperty = (0, getScoringRules_1.getScoringRulesLocale)(item.culture);
|
|
172
|
+
const localeByItemOwner = (0, getScoringRules_1.getScoringRulesLocaleByItemOwner)(item.owner);
|
|
173
|
+
// const title = item?.title || '';
|
|
174
|
+
// const snippet = item?.snippet || '';
|
|
175
|
+
// We removed the non-English character check since it was too restrictive.
|
|
176
|
+
// Some valid English-based content (like Esri Oceans data) may contain special characters
|
|
177
|
+
// such as Greek letters while still being primarily English content.
|
|
178
|
+
const isEnglishedBased = localeByCultreProperty === 'en' && localeByItemOwner === 'en';
|
|
179
|
+
// && containsNonEnglishCharacters(title + ' ' + snippet) === false;
|
|
180
|
+
return isEnglishedBased;
|
|
181
|
+
};
|
|
182
|
+
exports.isEligibleForCheckingTitleAndSnippetSearchability = isEligibleForCheckingTitleAndSnippetSearchability;
|
|
183
|
+
/**
|
|
184
|
+
* Clean up matched results by removing duplicates and overlapping strings.
|
|
185
|
+
* @param matchedResults array of matched results
|
|
186
|
+
* @returns an array of cleaned up matched MatchResult
|
|
187
|
+
*/
|
|
188
|
+
const cleanUpMatchedResults = (matchedResults) => {
|
|
189
|
+
if (!matchedResults || matchedResults.length === 0) {
|
|
190
|
+
return [];
|
|
191
|
+
}
|
|
192
|
+
// remove any leading and trailing spaces
|
|
193
|
+
// and filter out any empty strings
|
|
194
|
+
matchedResults = matchedResults.filter((d) => d.matchedString.trim() !== '');
|
|
195
|
+
const candidates = (0, exports.deduplicate)(matchedResults);
|
|
196
|
+
return (0, exports.removeOverlapped)(candidates);
|
|
197
|
+
};
|
|
198
|
+
exports.cleanUpMatchedResults = cleanUpMatchedResults;
|
|
199
|
+
const removeOverlapped = (arr) => {
|
|
200
|
+
const output = [];
|
|
201
|
+
arr.sort((a, b) => b.matchedString.length - a.matchedString.length);
|
|
202
|
+
for (const candidate of arr) {
|
|
203
|
+
let shouldNotInclude = false;
|
|
204
|
+
for (const item of output) {
|
|
205
|
+
if (item.matchedString
|
|
206
|
+
.toLowerCase()
|
|
207
|
+
.includes(candidate.matchedString.toLowerCase())) {
|
|
208
|
+
shouldNotInclude = true;
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (shouldNotInclude) {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
output.push(candidate);
|
|
216
|
+
}
|
|
217
|
+
return output;
|
|
218
|
+
};
|
|
219
|
+
exports.removeOverlapped = removeOverlapped;
|
|
220
|
+
/**
|
|
221
|
+
* Removes duplicate matched results from an array, ignoring case sensitivity.
|
|
222
|
+
*
|
|
223
|
+
* @param arr - The array of MatchResult to deduplicate.
|
|
224
|
+
* @returns A new array with duplicates removed, preserving the original case of the first occurrence.
|
|
225
|
+
*/
|
|
226
|
+
const deduplicate = (arr) => {
|
|
227
|
+
if (!arr || arr.length === 0) {
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
const seen = new Set();
|
|
231
|
+
return arr.filter((d) => {
|
|
232
|
+
const lowerStr = d.matchedString.toLowerCase();
|
|
233
|
+
if (seen.has(lowerStr)) {
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
seen.add(lowerStr);
|
|
237
|
+
return true;
|
|
238
|
+
});
|
|
239
|
+
};
|
|
240
|
+
exports.deduplicate = deduplicate;
|
|
241
|
+
//# sourceMappingURL=helpers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/helpers.ts"],"names":[],"mappings":";;;AACA,6CAA0C;AAC1C,6DAGiC;AAEjC,6EAA0E;AAiD1E;;;;;GAKG;AACI,MAAM,YAAY,GAAG,CAAC,GAAW,EAAE,GAAgB,EAAE,EAAE;IAC1D,IAAI,CAAC,GAAG,EAAE;QACN,OAAO,KAAK,CAAC;KAChB;IAED,2CAA2C;IAE3C,IAAI,CAAC,GAAG,EAAE;QACN,OAAO,KAAK,CAAC;KAChB;IAED,OAAO,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;AACtC,CAAC,CAAC;AAZW,QAAA,YAAY,gBAYvB;AAEF;;;;;;;;;;;;GAYG;AACI,MAAM,0BAA0B,GAAG,CAAC,EACvC,IAAI,EACJ,kBAAkB,EAClB,aAAa,EACb,WAAW,EACX,WAAW,GACoB,EAAiB,EAAE;IAClD,8CAA8C;IAC9C,MAAM,2BAA2B,GAC7B,IAAA,sCAA8B,EAAC,IAAI,EAAE,kBAAkB,EAAE,YAAY,CAAC,CAAC;IAE3E,IAAI,2BAA2B,CAAC,MAAM,EAAE;QACpC,OAAO,2BAA2B,CAAC;KACtC;IAED,gDAAgD;IAChD,MAAM,2BAA2B,GAAG,IAAA,sCAA8B,EAC9D,IAAI,EACJ,aAAa,EACb,YAAY,CACf,CAAC;IAEF,IAAI,2BAA2B,CAAC,MAAM,EAAE;QACpC,OAAO,2BAA2B,CAAC;KACtC;IAED,8CAA8C;IAC9C,OAAO,IAAA,+BAAuB,EAAC,IAAI,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;AACnE,CAAC,CAAC;AA5BW,QAAA,0BAA0B,8BA4BrC;AAEF;;;;;GAKG;AACI,MAAM,8BAA8B,GAAG,CAC1C,IAAW,EACX,KAAa,EACb,aAAiC,EACpB,EAAE;IACf,MAAM,KAAK,GAAG,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,KAAK,KAAI,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,CAAA,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAE,OAAO,KAAI,EAAE,CAAC;IAEpC,IAAI,CAAC,KAAK,IAAI,CAAC,OAAO,EAAE;QACpB,OAAO,EAAE,CAAC;KACb;IAED,IAAI,CAAC,KAAK,EAAE;QACR,OAAO,EAAE,CAAC;KACb;IAED,IAAI;QACA,kEAAkE;QAClE,sEAAsE;QAEtE,iCAAiC;QACjC,8BAA8B;QAC9B,gCAAgC;QAChC,MAAM;QAEN,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE;YAC5B,KAAK,GAAG,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;SACvD;QAED,MAAM,QAAQ,GAAG,GAAG,KAAK,KAAK,OAAO,EAAE,CAAC;QACxC,MAAM,OAAO,GAAa,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QAEtD,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;YACjD,aAAa;YACb,aAAa;SAChB,CAAC,CAAC,CAAC;QAEJ,OAAO,IAAA,6BAAqB,EAAC,YAAY,CAAC,CAAC;KAC9C;IAAC,OAAO,KAAK,EAAE;QACZ,OAAO,CAAC,KAAK,CAAC,0CAA0C,EAAE,KAAK,CAAC,CAAC;QACjE,OAAO,EAAE,CAAC;KACb;AACL,CAAC,CAAC;AA1CW,QAAA,8BAA8B,kCA0CzC;AAEF;;;;;;;GAOG;AACI,MAAM,uBAAuB,GAAG,CACnC,IAAW,EACX,sBAAgC,EAChC,WAAwB,EAC1B,EAAE;IACA,IAAI,CAAC,sBAAsB,IAAI,sBAAsB,CAAC,MAAM,KAAK,CAAC,EAAE;QAChE,OAAO,EAAE,CAAC;KACb;IAED,yCAAyC;IACzC,yDAAyD;IACzD,4DAA4D;IAC5D,sBAAsB,GAAG,sBAAsB;SAC1C,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;SAChC,MAAM,CACH,CAAC,OAAO,EAAE,EAAE,CACR,OAAO,KAAK,EAAE,IAAI,IAAA,oBAAY,EAAC,OAAO,EAAE,WAAW,CAAC,KAAK,KAAK,CACrE,CAAC;IAEN,IAAI,sBAAsB,CAAC,MAAM,KAAK,CAAC,EAAE;QACrC,OAAO,EAAE,CAAC;KACb;IAED,yDAAyD;IACzD,6BAA6B;IAC7B,KAAK;IAEL,0CAA0C;IAC1C,oDAAoD;IACpD,WAAW;IACX,KAAK;IAEL,sEAAsE;IACtE,0EAA0E;IAE1E,sEAAsE;IACtE,iBAAiB;IACjB,IAAI;IACJ,8DAA8D;IAC9D,kEAAkE;IAElE,+BAA+B;IAC/B,8BAA8B;IAC9B,gCAAgC;IAChC,MAAM;IACN,oCAAoC;IAEpC,mCAAmC;IAEnC,MAAM,mBAAmB,GAAG,IAAA,iDAAuB,EAAC,sBAAsB,CAAC,CAAC;IAE5E,OAAO,IAAA,sCAA8B,EAAC,IAAI,EAAE,mBAAmB,EAAE,QAAQ,CAAC,CAAC;AAC/E,CAAC,CAAC;AApDW,QAAA,uBAAuB,2BAoDlC;AAEF;;;;GAIG;AACI,MAAM,cAAc,GAAG,CAAC,GAAa,EAAe,EAAE;IACzD,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE;QAC1B,OAAO,IAAI,GAAG,EAAE,CAAC;KACpB;IAED,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IAE7C,OAAO,IAAI,GAAG,CAAS,IAAI,CAAC,CAAC;AACjC,CAAC,CAAC;AARW,QAAA,cAAc,kBAQzB;AAEF;;;;;;;;;;;;;GAaG;AACI,MAAM,iDAAiD,GAAG,CAC7D,IAAW,EACJ,EAAE;IACT,IAAI,CAAC,IAAI,EAAE;QACP,OAAO,KAAK,CAAC;KAChB;IAED,MAAM,WAAW,GAAG,IAAA,iBAAO,EAAC,IAAI,CAAC,CAAC;IAElC,IAAI,CAAC,WAAW,EAAE;QACd,OAAO,KAAK,CAAC;KAChB;IAED,MAAM,sBAAsB,GAAG,IAAA,uCAAqB,EAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEnE,MAAM,iBAAiB,GAAG,IAAA,kDAAgC,EAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAEvE,mCAAmC;IACnC,uCAAuC;IAEvC,2EAA2E;IAC3E,0FAA0F;IAC1F,qEAAqE;IACrE,MAAM,gBAAgB,GAClB,sBAAsB,KAAK,IAAI,IAAI,iBAAiB,KAAK,IAAI,CAAC;IAClE,oEAAoE;IAEpE,OAAO,gBAAgB,CAAC;AAC5B,CAAC,CAAC;AA5BW,QAAA,iDAAiD,qDA4B5D;AAEF;;;;GAIG;AACI,MAAM,qBAAqB,GAAG,CAAC,cAA6B,EAAE,EAAE;IACnE,IAAI,CAAC,cAAc,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE;QAChD,OAAO,EAAE,CAAC;KACb;IAED,yCAAyC;IACzC,mCAAmC;IACnC,cAAc,GAAG,cAAc,CAAC,MAAM,CAClC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,CACvC,CAAC;IAEF,MAAM,UAAU,GAAkB,IAAA,mBAAW,EAAC,cAAc,CAAC,CAAC;IAE9D,OAAO,IAAA,wBAAgB,EAAC,UAAU,CAAC,CAAC;AACxC,CAAC,CAAC;AAdW,QAAA,qBAAqB,yBAchC;AAEK,MAAM,gBAAgB,GAAG,CAAC,GAAkB,EAAiB,EAAE;IAClE,MAAM,MAAM,GAAkB,EAAE,CAAC;IAEjC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IAEpE,KAAK,MAAM,SAAS,IAAI,GAAG,EAAE;QACzB,IAAI,gBAAgB,GAAG,KAAK,CAAC;QAE7B,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE;YACvB,IACI,IAAI,CAAC,aAAa;iBACb,WAAW,EAAE;iBACb,QAAQ,CAAC,SAAS,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,EACtD;gBACE,gBAAgB,GAAG,IAAI,CAAC;gBACxB,MAAM;aACT;SACJ;QAED,IAAI,gBAAgB,EAAE;YAClB,SAAS;SACZ;QAED,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;KAC1B;IAED,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AA3BW,QAAA,gBAAgB,oBA2B3B;AAEF;;;;;GAKG;AACI,MAAM,WAAW,GAAG,CAAC,GAAkB,EAAiB,EAAE;IAC7D,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE;QAC1B,OAAO,EAAE,CAAC;KACb;IAED,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAE/B,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACpB,MAAM,QAAQ,GAAG,CAAC,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE;YACpB,OAAO,KAAK,CAAC;SAChB;QACD,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnB,OAAO,IAAI,CAAC;IAChB,CAAC,CAAC,CAAC;AACP,CAAC,CAAC;AAfW,QAAA,WAAW,eAetB"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { IItem } from '../../types';
|
|
2
|
+
import { MatchResult } from './helpers';
|
|
3
|
+
/**
|
|
4
|
+
* Sets the extra matching patterns and rejected patterns for date-time information.
|
|
5
|
+
*
|
|
6
|
+
* @param extraMatchingPatterns - An array of strings representing the extra matching patterns.
|
|
7
|
+
* @param extraRejectedPatterns - An array of strings representing the extra rejected patterns.
|
|
8
|
+
* @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
|
|
9
|
+
*
|
|
10
|
+
* @remarks
|
|
11
|
+
* - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
|
|
12
|
+
* set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
|
|
13
|
+
*/
|
|
14
|
+
export declare const setExtraDateTimeInfoMatchingPatterns: (extraMatchingPatterns: string[], extraRejectedPatterns: string[], shouldAvoidUsingWordBoundary?: boolean) => void;
|
|
15
|
+
/**
|
|
16
|
+
* Matches date and time information from the title and snippet of an item.
|
|
17
|
+
*
|
|
18
|
+
* @param {IItem} item - The item containing title and snippet to be matched against.
|
|
19
|
+
* @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be included in the regex.
|
|
20
|
+
* @returns {string[]} - An array of matched date and time information strings.
|
|
21
|
+
*/
|
|
22
|
+
export declare const matchDateTimeInfo: (item: IItem, customMatchingPattenrs?: string[]) => MatchResult[];
|
|
23
|
+
/**
|
|
24
|
+
* Check if the provided string contains recognized date and time information.
|
|
25
|
+
* @param str - The string to check.
|
|
26
|
+
* @returns {boolean} - True if the string contains recognized date and time information, false otherwise.
|
|
27
|
+
*/
|
|
28
|
+
export declare const isRecognizedDateTimeInfo: (str: string) => boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Determines if a given string is a rejected topic.
|
|
31
|
+
* @param str - The string to check.
|
|
32
|
+
* @returns {boolean} - True if the string is a rejected topic, false otherwise.
|
|
33
|
+
*/
|
|
34
|
+
export declare const isRejectedDateTimeInfo: (str: string) => boolean;
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.isRejectedDateTimeInfo = exports.isRecognizedDateTimeInfo = exports.matchDateTimeInfo = exports.setExtraDateTimeInfoMatchingPatterns = void 0;
|
|
7
|
+
const helpers_1 = require("./helpers");
|
|
8
|
+
const TitleSummaryMatchingPatterns_Time_json_1 = __importDefault(require("../../data/TitleSummaryMatchingPatterns_Time.json"));
|
|
9
|
+
const TitleSummaryRejectedPatterns_json_1 = __importDefault(require("../../data/TitleSummaryRejectedPatterns.json"));
|
|
10
|
+
const createWordBoundaryRegex_1 = require("../util/createWordBoundaryRegex");
|
|
11
|
+
/**
|
|
12
|
+
* Match time intervals relative to the present moment rather than specifying an absolute date or time
|
|
13
|
+
* - `\b`: Word boundary to ensure we match whole words and not substrings.
|
|
14
|
+
* - `(?:(last|past|next|current|latest|every)\s+)?`: A non-capturing group to optionally match words like "last", "past", "next", "current", "latest", and "every" followed by one or more spaces (\s+). The ? at the end makes this group optional.
|
|
15
|
+
* - `(\d+)?`: An optional capturing group to match one or more digits. This covers patterns like "7", "14", "48", etc.
|
|
16
|
+
* - `\s*`: Zero or more spaces. This handles cases where there might or might not be spaces between the number and the time unit.
|
|
17
|
+
* - `(year|month|week|day|hour|minute)s?`: A capturing group to match the time units: "year", "quarter", "month", "week", "day", "hour", and "minute". The s? makes the "s" optional to handle both singular and plural forms (e.g., "day" and "days").
|
|
18
|
+
*/
|
|
19
|
+
const relativeDateTimeRegex = /\b(?:(last|past|next|current|latest|every)\s+)(\d+)?\s*(year|month|week|day|hour|minute|quarter|calendar year)s?\b/gi;
|
|
20
|
+
/**
|
|
21
|
+
* A regular expression that matches time duration: e.g. '10-months', '5 Year', '3 hours'
|
|
22
|
+
*/
|
|
23
|
+
const timeDurationRegEx = /\b(\d+)(?:-)?\s*(year|month|week|day|hour|minute)s?\b/gi;
|
|
24
|
+
/**
|
|
25
|
+
* A regular expression that matches patterns like "3-5 years".
|
|
26
|
+
* Here's a breakdown of the regular expression:
|
|
27
|
+
*
|
|
28
|
+
* - `\b`: Word boundary to ensure we match whole words and not substrings.
|
|
29
|
+
* - `(\d+)`: A capturing group to match one or more digits.
|
|
30
|
+
* - `\s*`: Zero or more spaces.
|
|
31
|
+
* - `(?:-|to)`: A non-capturing group to match a hyphen (-) or the word "to".
|
|
32
|
+
* - `\s*`: Zero or more spaces.
|
|
33
|
+
* - `(\d+)`: A capturing group to match one or more digits.
|
|
34
|
+
* - `\s*`: Zero or more spaces.
|
|
35
|
+
* - `(year|month|week|day|hour|minute)s?`: A capturing group to match the time units: "year", "month", "week", "day", "hour", and "minute". The s? makes the "s" optional to handle both singular and plural forms (e.g., "day" and "days").
|
|
36
|
+
*/
|
|
37
|
+
const rangeWithUnitsRegEx = /\b(\d+)\s*(?:-|to)\s*(\d+)\s*(year|month|week|day|hour|minute)s?\b/gi;
|
|
38
|
+
/**
|
|
39
|
+
* A regular expression that matches the provided year-related patterns. e.g. '2022', '2020-2024', '1950 to 2050'
|
|
40
|
+
* Here's a breakdown of the regular expression:
|
|
41
|
+
*
|
|
42
|
+
* - `\b`: Word boundary to ensure we match whole words and not substrings.
|
|
43
|
+
* - `(?:\d{4}|present)`: A non-capturing group to match either a four-digit year (\d{4}) or the word "present".
|
|
44
|
+
* - `(?:\s*(?:-|and|to)\s*(?:\d{4}|present))?`: An optional non-capturing group to match a range or conjunction with:
|
|
45
|
+
* - `(?:-|and|to)`: A non-capturing group to match a hyphen (-), the word "and", or the word "to".
|
|
46
|
+
* - `(?:\d{4}|present)`: A non-capturing group to match either a four-digit year or the word "present".
|
|
47
|
+
* - `\b`: Word boundary to ensure we match whole words and not substrings.
|
|
48
|
+
*/
|
|
49
|
+
const yearRangeRegEx = /\b(?:\d{4}|present)(?:\s*(?:-|and|to)\s*(?:\d{4}|present))?\b/gi;
|
|
50
|
+
/**
|
|
51
|
+
* A regular expression that matches month names and abbreviations.
|
|
52
|
+
* Matches full month names (e.g., January, February) and common abbreviations (e.g., Jan, Feb).
|
|
53
|
+
*/
|
|
54
|
+
const monthRegEx = /\b(?:january|february|march|april|may|june|july|august|september|october|november|december|jan|feb|mar|apr|jun|jul|aug|sep|sept|oct|nov|dec)\b/gi;
|
|
55
|
+
/**
|
|
56
|
+
* A regular expression that matches weekday names and abbreviations.
|
|
57
|
+
* Matches full weekday names (e.g., Monday, Tuesday) and common abbreviations (e.g., Mon, Tue).
|
|
58
|
+
*/
|
|
59
|
+
const weekdayRegEx = /\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday|mon|tue|wed|thu|fri|sat|sun)\b/gi;
|
|
60
|
+
/**
|
|
61
|
+
* Match text like '20th century'
|
|
62
|
+
*/
|
|
63
|
+
const centuryRegEx = /\b(\d{1,2}(?:st|nd|rd|th)\s+century)\b/gi;
|
|
64
|
+
/**
|
|
65
|
+
* Additional patterns for matching date and time information.
|
|
66
|
+
* This includes phrases like 'once a day', 'monthly', 'daily', etc.
|
|
67
|
+
*/
|
|
68
|
+
const additionalPatterns = /\b(once\s+a\s+day|monthly|daily|hourly|weekly|bi-weekly|quarterly|semi-annually|annual|annually)\b/gi;
|
|
69
|
+
// const dateTimeMatchingPatterns = TitleSummaryMatchingPatterns.time;
|
|
70
|
+
const regex4DateTimeInfo = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(TitleSummaryMatchingPatterns_Time_json_1.default);
|
|
71
|
+
/**
|
|
72
|
+
* Set of rejected date and time information.
|
|
73
|
+
*/
|
|
74
|
+
let rejectedDateTimeInfoSet = null; //new Set<string>(TitleSummaryMatchingPatterns.topics)
|
|
75
|
+
/**
|
|
76
|
+
* Regular expression to match additional date and time information.
|
|
77
|
+
*/
|
|
78
|
+
let regexForExtraDateTimeInfo = null;
|
|
79
|
+
/**
|
|
80
|
+
* Create a set of rejected date and time information.
|
|
81
|
+
* @param extraRejectedPatternsForDateTimeInfo
|
|
82
|
+
*/
|
|
83
|
+
const createRejectedDateTimeInfoSet = (extraRejectedPatternsForDateTimeInfo = []) => {
|
|
84
|
+
rejectedDateTimeInfoSet = (0, helpers_1.toLowercaseSet)([
|
|
85
|
+
...TitleSummaryRejectedPatterns_json_1.default.common,
|
|
86
|
+
...TitleSummaryRejectedPatterns_json_1.default.time,
|
|
87
|
+
...extraRejectedPatternsForDateTimeInfo,
|
|
88
|
+
]);
|
|
89
|
+
};
|
|
90
|
+
/**
|
|
91
|
+
* Sets the extra matching patterns and rejected patterns for date-time information.
|
|
92
|
+
*
|
|
93
|
+
* @param extraMatchingPatterns - An array of strings representing the extra matching patterns.
|
|
94
|
+
* @param extraRejectedPatterns - An array of strings representing the extra rejected patterns.
|
|
95
|
+
* @param {boolean} [shouldAvoidUsingWordBoundary=false] - A boolean indicating whether to avoid using word boundaries (useful for languages like Chinese and Japanese).
|
|
96
|
+
*
|
|
97
|
+
* @remarks
|
|
98
|
+
* - For languages like Japanese that don't use spaces between words (or where word boundaries are not easily determined by spaces or punctuation),
|
|
99
|
+
* set `shouldAvoidUsingWordBoundary` to true to remove `\b` and use lookarounds to ensure correct boundaries.
|
|
100
|
+
*/
|
|
101
|
+
const setExtraDateTimeInfoMatchingPatterns = (extraMatchingPatterns, extraRejectedPatterns, shouldAvoidUsingWordBoundary = false) => {
|
|
102
|
+
if (extraMatchingPatterns && extraMatchingPatterns.length) {
|
|
103
|
+
regexForExtraDateTimeInfo = (0, createWordBoundaryRegex_1.createWordBoundaryRegex)(extraMatchingPatterns, {
|
|
104
|
+
shouldAvoidUsingWordBoundary,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
createRejectedDateTimeInfoSet(extraRejectedPatterns);
|
|
108
|
+
};
|
|
109
|
+
exports.setExtraDateTimeInfoMatchingPatterns = setExtraDateTimeInfoMatchingPatterns;
|
|
110
|
+
/**
|
|
111
|
+
* Matches date and time information from the title and snippet of an item.
|
|
112
|
+
*
|
|
113
|
+
* @param {IItem} item - The item containing title and snippet to be matched against.
|
|
114
|
+
* @param {string[]} [customMatchingPattenrs] - Optional custom matching patterns to be included in the regex.
|
|
115
|
+
* @returns {string[]} - An array of matched date and time information strings.
|
|
116
|
+
*/
|
|
117
|
+
const matchDateTimeInfo = (item, customMatchingPattenrs) => {
|
|
118
|
+
// const regex = new RegExp(regex1, 'gi')
|
|
119
|
+
const title = item.title || '';
|
|
120
|
+
const snippet = item.snippet || '';
|
|
121
|
+
const regex4TimeInfo = [
|
|
122
|
+
relativeDateTimeRegex,
|
|
123
|
+
timeDurationRegEx,
|
|
124
|
+
yearRangeRegEx,
|
|
125
|
+
centuryRegEx,
|
|
126
|
+
additionalPatterns,
|
|
127
|
+
rangeWithUnitsRegEx,
|
|
128
|
+
monthRegEx,
|
|
129
|
+
weekdayRegEx,
|
|
130
|
+
];
|
|
131
|
+
// console.log(dateInfoText, patternsFromTextLabelingData)
|
|
132
|
+
// regex4TimeInfo.push(patternsFromTextLabelingData);
|
|
133
|
+
const matchedByCommonTimePatterns = [];
|
|
134
|
+
const combined = `${title}\n${snippet}`;
|
|
135
|
+
for (const regex of regex4TimeInfo) {
|
|
136
|
+
// const matchedLocationsFromTitle: string[] = title.match(regex) || [];
|
|
137
|
+
// const matchedLocationsFromSnippet: string[] =
|
|
138
|
+
// snippet.match(regex) || [];
|
|
139
|
+
// for (const text of matchedLocationsFromTitle) {
|
|
140
|
+
// matchedByCommonTimePatterns.push(text);
|
|
141
|
+
// }
|
|
142
|
+
// for (const text of matchedLocationsFromSnippet) {
|
|
143
|
+
// matchedByCommonTimePatterns.push(text);
|
|
144
|
+
// }
|
|
145
|
+
const matches = combined.match(regex) || [];
|
|
146
|
+
for (const match of matches) {
|
|
147
|
+
matchedByCommonTimePatterns.push(match);
|
|
148
|
+
}
|
|
149
|
+
// const matched = [
|
|
150
|
+
// ...matchedLocationsFromTitle,
|
|
151
|
+
// ...matchedLocationsFromSnippet,
|
|
152
|
+
// ];
|
|
153
|
+
}
|
|
154
|
+
const matchResultsByCommonPatterns = matchedByCommonTimePatterns.map((matchedString) => ({
|
|
155
|
+
matchedString,
|
|
156
|
+
patternSource: 'predefined',
|
|
157
|
+
}));
|
|
158
|
+
if (!rejectedDateTimeInfoSet) {
|
|
159
|
+
createRejectedDateTimeInfoSet();
|
|
160
|
+
}
|
|
161
|
+
const matchedByTerms = (0, helpers_1.matchUsingMultiplePatterns)({
|
|
162
|
+
item,
|
|
163
|
+
predefinedPatterns: regex4DateTimeInfo,
|
|
164
|
+
extraPatterns: regexForExtraDateTimeInfo,
|
|
165
|
+
customTerms: customMatchingPattenrs,
|
|
166
|
+
rejectedSet: rejectedDateTimeInfoSet,
|
|
167
|
+
});
|
|
168
|
+
return (0, helpers_1.cleanUpMatchedResults)([
|
|
169
|
+
...matchResultsByCommonPatterns,
|
|
170
|
+
...matchedByTerms,
|
|
171
|
+
]);
|
|
172
|
+
// if (!candidates.length) {
|
|
173
|
+
// return matchWithCustomPatterns(
|
|
174
|
+
// item,
|
|
175
|
+
// customMatchingPattenrs,
|
|
176
|
+
// RejectedDateTimeInfosSet
|
|
177
|
+
// );
|
|
178
|
+
// }
|
|
179
|
+
// return cleanUpMatchedResults(candidates);
|
|
180
|
+
};
|
|
181
|
+
exports.matchDateTimeInfo = matchDateTimeInfo;
|
|
182
|
+
/**
|
|
183
|
+
* Check if the provided string contains recognized date and time information.
|
|
184
|
+
* @param str - The string to check.
|
|
185
|
+
* @returns {boolean} - True if the string contains recognized date and time information, false otherwise.
|
|
186
|
+
*/
|
|
187
|
+
const isRecognizedDateTimeInfo = (str) => {
|
|
188
|
+
const dateTimeInfo = (0, exports.matchDateTimeInfo)({
|
|
189
|
+
title: str,
|
|
190
|
+
snippet: '',
|
|
191
|
+
});
|
|
192
|
+
return dateTimeInfo.length > 0;
|
|
193
|
+
};
|
|
194
|
+
exports.isRecognizedDateTimeInfo = isRecognizedDateTimeInfo;
|
|
195
|
+
/**
|
|
196
|
+
* Determines if a given string is a rejected topic.
|
|
197
|
+
* @param str - The string to check.
|
|
198
|
+
* @returns {boolean} - True if the string is a rejected topic, false otherwise.
|
|
199
|
+
*/
|
|
200
|
+
const isRejectedDateTimeInfo = (str) => {
|
|
201
|
+
if (!rejectedDateTimeInfoSet) {
|
|
202
|
+
createRejectedDateTimeInfoSet();
|
|
203
|
+
}
|
|
204
|
+
return (0, helpers_1.isFoundInSet)(str, rejectedDateTimeInfoSet);
|
|
205
|
+
};
|
|
206
|
+
exports.isRejectedDateTimeInfo = isRejectedDateTimeInfo;
|
|
207
|
+
//# sourceMappingURL=matchDateTimeInfo.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"matchDateTimeInfo.js","sourceRoot":"","sources":["../../../src/lib/checkTitleAndSnippetSearchability/matchDateTimeInfo.ts"],"names":[],"mappings":";;;;;;AACA,uCAOmB;AACnB,+HAAyF;AACzF,qHAAwF;AACxF,6EAA0E;AAE1E;;;;;;;GAOG;AACH,MAAM,qBAAqB,GACvB,sHAAsH,CAAC;AAE3H;;GAEG;AACH,MAAM,iBAAiB,GACnB,yDAAyD,CAAC;AAE9D;;;;;;;;;;;;GAYG;AACH,MAAM,mBAAmB,GACrB,sEAAsE,CAAC;AAE3E;;;;;;;;;;GAUG;AACH,MAAM,cAAc,GAChB,iEAAiE,CAAC;AAEtE;;;GAGG;AACH,MAAM,UAAU,GACZ,kJAAkJ,CAAC;AAEvJ;;;GAGG;AACH,MAAM,YAAY,GACd,gGAAgG,CAAC;AAErG;;GAEG;AACH,MAAM,YAAY,GAAG,0CAA0C,CAAC;AAEhE;;;GAGG;AACH,MAAM,kBAAkB,GACpB,sGAAsG,CAAC;AAE3G,sEAAsE;AAEtE,MAAM,kBAAkB,GAAG,IAAA,iDAAuB,EAAC,gDAAwB,CAAC,CAAC;AAE7E;;GAEG;AACH,IAAI,uBAAuB,GAAgB,IAAI,CAAC,CAAC,sDAAsD;AAEvG;;GAEG;AACH,IAAI,yBAAyB,GAAW,IAAI,CAAC;AAE7C;;;GAGG;AACH,MAAM,6BAA6B,GAAG,CAClC,uCAAiD,EAAE,EACrD,EAAE;IACA,uBAAuB,GAAG,IAAA,wBAAc,EAAC;QACrC,GAAG,2CAA4B,CAAC,MAAM;QACtC,GAAG,2CAA4B,CAAC,IAAI;QACpC,GAAG,oCAAoC;KAC1C,CAAC,CAAC;AACP,CAAC,CAAC;AAEF;;;;;;;;;;GAUG;AACI,MAAM,oCAAoC,GAAG,CAChD,qBAA+B,EAC/B,qBAA+B,EAC/B,4BAA4B,GAAG,KAAK,EACtC,EAAE;IACA,IAAI,qBAAqB,IAAI,qBAAqB,CAAC,MAAM,EAAE;QACvD,yBAAyB,GAAG,IAAA,iDAAuB,EAC/C,qBAAqB,EACrB;YACI,4BAA4B;SAC/B,CACJ,CAAC;KACL;IAED,6BAA6B,CAAC,qBAAqB,CAAC,CAAC;AACzD,CAAC,CAAC;AAfW,QAAA,oCAAoC,wCAe/C;AAEF;;;;;;GAMG;AACI,MAAM,iBAAiB,GAAG,CAC7B,IAAW,EACX,sBAAiC,EACpB,EAAE;IACf,yCAAyC;IAEzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;IAEnC,MAAM,cAAc,GAAG;QACnB,qBAAqB;QACrB,iBAAiB;QACjB,cAAc;QACd,YAAY;QACZ,kBAAkB;QAClB,mBAAmB;QACnB,UAAU;QACV,YAAY;KACf,CAAC;IAEF,0DAA0D;IAC1D,qDAAqD;IAErD,MAAM,2BAA2B,GAAa,EAAE,CAAC;IAEjD,MAAM,QAAQ,GAAG,GAAG,KAAK,KAAK,OAAO,EAAE,CAAC;IAExC,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE;QAChC,wEAAwE;QACxE,gDAAgD;QAChD,kCAAkC;QAElC,kDAAkD;QAClD,8CAA8C;QAC9C,IAAI;QAEJ,oDAAoD;QACpD,8CAA8C;QAC9C,IAAI;QAEJ,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QAE5C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE;YACzB,2BAA2B,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;SAC3C;QAED,oBAAoB;QACpB,oCAAoC;QACpC,sCAAsC;QACtC,KAAK;KACR;IAED,MAAM,4BAA4B,GAC9B,2BAA2B,CAAC,GAAG,CAAC,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;QAChD,aAAa;QACb,aAAa,EAAE,YAAY;KAC9B,CAAC,CAAC,CAAC;IAER,IAAI,CAAC,uBAAuB,EAAE;QAC1B,6BAA6B,EAAE,CAAC;KACnC;IAED,MAAM,cAAc,GAAG,IAAA,oCAA0B,EAAC;QAC9C,IAAI;QACJ,kBAAkB,EAAE,kBAAkB;QACtC,aAAa,EAAE,yBAAyB;QACxC,WAAW,EAAE,sBAAsB;QACnC,WAAW,EAAE,uBAAuB;KACvC,CAAC,CAAC;IAEH,OAAO,IAAA,+BAAqB,EAAC;QACzB,GAAG,4BAA4B;QAC/B,GAAG,cAAc;KACpB,CAAC,CAAC;IAEH,4BAA4B;IAC5B,sCAAsC;IACtC,gBAAgB;IAChB,kCAAkC;IAClC,mCAAmC;IACnC,SAAS;IACT,IAAI;IAEJ,4CAA4C;AAChD,CAAC,CAAC;AApFW,QAAA,iBAAiB,qBAoF5B;AAEF;;;;GAIG;AACI,MAAM,wBAAwB,GAAG,CAAC,GAAW,EAAW,EAAE;IAC7D,MAAM,YAAY,GAAG,IAAA,yBAAiB,EAAC;QACnC,KAAK,EAAE,GAAG;QACV,OAAO,EAAE,EAAE;KACL,CAAC,CAAC;IAEZ,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;AACnC,CAAC,CAAC;AAPW,QAAA,wBAAwB,4BAOnC;AAEF;;;;GAIG;AACI,MAAM,sBAAsB,GAAG,CAAC,GAAW,EAAW,EAAE;IAC3D,IAAI,CAAC,uBAAuB,EAAE;QAC1B,6BAA6B,EAAE,CAAC;KACnC;IAED,OAAO,IAAA,sBAAY,EAAC,GAAG,EAAE,uBAAuB,CAAC,CAAC;AACtD,CAAC,CAAC;AANW,QAAA,sBAAsB,0BAMjC"}
|