resuml 1.12.0 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +89 -8
- package/dist/index.cjs.map +1 -1
- package/dist/mcp/server.cjs +89 -8
- package/dist/mcp/server.cjs.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -762,7 +762,71 @@ var init_en = __esm({
|
|
|
762
762
|
"process",
|
|
763
763
|
"robust",
|
|
764
764
|
"consistent",
|
|
765
|
-
"operations"
|
|
765
|
+
"operations",
|
|
766
|
+
// URL/email/domain fragments
|
|
767
|
+
"http",
|
|
768
|
+
"https",
|
|
769
|
+
"www",
|
|
770
|
+
"com",
|
|
771
|
+
"org",
|
|
772
|
+
"net",
|
|
773
|
+
"mailto",
|
|
774
|
+
// Resume/YAML schema field names (in case raw YAML is pasted)
|
|
775
|
+
"name",
|
|
776
|
+
"keywords",
|
|
777
|
+
"highlights",
|
|
778
|
+
"startdate",
|
|
779
|
+
"enddate",
|
|
780
|
+
"website",
|
|
781
|
+
"profiles",
|
|
782
|
+
"basics",
|
|
783
|
+
"position",
|
|
784
|
+
"institution",
|
|
785
|
+
"studytype",
|
|
786
|
+
"fluency",
|
|
787
|
+
"issuer",
|
|
788
|
+
"network",
|
|
789
|
+
"username",
|
|
790
|
+
"countrycode",
|
|
791
|
+
"region",
|
|
792
|
+
// Generic nouns that aren't skills
|
|
793
|
+
"product",
|
|
794
|
+
"company",
|
|
795
|
+
"service",
|
|
796
|
+
"services",
|
|
797
|
+
"platform",
|
|
798
|
+
"solutions",
|
|
799
|
+
"ability",
|
|
800
|
+
"opportunity",
|
|
801
|
+
"candidate",
|
|
802
|
+
"applicant",
|
|
803
|
+
"position",
|
|
804
|
+
"salary",
|
|
805
|
+
"compensation",
|
|
806
|
+
"benefits",
|
|
807
|
+
"perks",
|
|
808
|
+
"bonus",
|
|
809
|
+
"development",
|
|
810
|
+
"management",
|
|
811
|
+
"knowledge",
|
|
812
|
+
"modern",
|
|
813
|
+
"advanced",
|
|
814
|
+
"practices",
|
|
815
|
+
"nice",
|
|
816
|
+
"technologies",
|
|
817
|
+
"technology",
|
|
818
|
+
"frameworks",
|
|
819
|
+
"framework",
|
|
820
|
+
"tools",
|
|
821
|
+
"data",
|
|
822
|
+
"based",
|
|
823
|
+
"contribute",
|
|
824
|
+
"contributions",
|
|
825
|
+
"migration",
|
|
826
|
+
"leading",
|
|
827
|
+
"source",
|
|
828
|
+
"visit",
|
|
829
|
+
"join"
|
|
766
830
|
]
|
|
767
831
|
};
|
|
768
832
|
en_default = en;
|
|
@@ -1333,8 +1397,20 @@ var init_genericChecks = __esm({
|
|
|
1333
1397
|
});
|
|
1334
1398
|
|
|
1335
1399
|
// src/ats/jdMatcher.ts
|
|
1400
|
+
function stripNoise(text) {
|
|
1401
|
+
return text.replace(/https?:\/\/[^\s]+/gi, " ").replace(/www\.[^\s]+/gi, " ").replace(/[\w.+-]+@[\w.-]+\.[a-z]{2,}/gi, " ").replace(/(?:^|\s)\/[\w/.-]+/g, " ").replace(/\b[a-z]+[A-Z][a-zA-Z]*\b/g, (match2) => {
|
|
1402
|
+
return match2.replace(/([a-z])([A-Z])/g, "$1 $2");
|
|
1403
|
+
});
|
|
1404
|
+
}
|
|
1336
1405
|
function tokenize(text, stopWords) {
|
|
1337
|
-
return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) =>
|
|
1406
|
+
return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => {
|
|
1407
|
+
if (word.length <= 2) return false;
|
|
1408
|
+
if (stopWords.has(word)) return false;
|
|
1409
|
+
if (word.startsWith("//") || word.startsWith("http")) return false;
|
|
1410
|
+
if (/^\d+$/.test(word)) return false;
|
|
1411
|
+
if (/^[/+-]+$/.test(word)) return false;
|
|
1412
|
+
return true;
|
|
1413
|
+
});
|
|
1338
1414
|
}
|
|
1339
1415
|
function simpleStem(word, language) {
|
|
1340
1416
|
if (language === "de") {
|
|
@@ -1496,12 +1572,17 @@ function extractBrandNames(text) {
|
|
|
1496
1572
|
function extractKeywords(text, language, maxKeywords = 30) {
|
|
1497
1573
|
const langData = getLanguageData(language);
|
|
1498
1574
|
const stopWords = new Set(langData.stopWords);
|
|
1499
|
-
const
|
|
1575
|
+
const cleanText = stripNoise(text);
|
|
1576
|
+
const compoundTerms = extractCompoundTerms(cleanText);
|
|
1500
1577
|
const brandNames = extractBrandNames(text);
|
|
1501
|
-
const { requirementText
|
|
1502
|
-
const
|
|
1503
|
-
|
|
1504
|
-
|
|
1578
|
+
const { requirementText } = splitJdSections(cleanText);
|
|
1579
|
+
const hasRequirementSections = requirementText.trim().length > 0;
|
|
1580
|
+
let allTokens;
|
|
1581
|
+
if (hasRequirementSections) {
|
|
1582
|
+
allTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
|
|
1583
|
+
} else {
|
|
1584
|
+
allTokens = tokenize(cleanText, stopWords).filter((t) => !brandNames.has(t));
|
|
1585
|
+
}
|
|
1505
1586
|
const stemmed = allTokens.map((t) => simpleStem(t, language));
|
|
1506
1587
|
const tf = buildTfMap(stemmed);
|
|
1507
1588
|
const stemToOriginal = /* @__PURE__ */ new Map();
|
|
@@ -1515,7 +1596,7 @@ function extractKeywords(text, language, maxKeywords = 30) {
|
|
|
1515
1596
|
const compoundWordSet = new Set(compoundsFlat);
|
|
1516
1597
|
const singleKeywords = [...tf.entries()].filter(([stem]) => stem.length > 2).filter(([stem]) => {
|
|
1517
1598
|
const original = stemToOriginal.get(stem) || stem;
|
|
1518
|
-
if (compoundWordSet.has(original) && !
|
|
1599
|
+
if (compoundWordSet.has(original) && !allTokens.includes(original)) {
|
|
1519
1600
|
return false;
|
|
1520
1601
|
}
|
|
1521
1602
|
return true;
|