paper-search-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +165 -0
- package/LICENSE +21 -0
- package/README-sc.md +642 -0
- package/README.md +642 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +637 -0
- package/dist/cli.js.map +1 -0
- package/dist/config/ConfigService.d.ts +26 -0
- package/dist/config/ConfigService.d.ts.map +1 -0
- package/dist/config/ConfigService.js +145 -0
- package/dist/config/ConfigService.js.map +1 -0
- package/dist/config/constants.d.ts +140 -0
- package/dist/config/constants.d.ts.map +1 -0
- package/dist/config/constants.js +93 -0
- package/dist/config/constants.js.map +1 -0
- package/dist/core/diagnostics.d.ts +43 -0
- package/dist/core/diagnostics.d.ts.map +1 -0
- package/dist/core/diagnostics.js +544 -0
- package/dist/core/diagnostics.js.map +1 -0
- package/dist/core/handleToolCall.d.ts +8 -0
- package/dist/core/handleToolCall.d.ts.map +1 -0
- package/dist/core/handleToolCall.js +440 -0
- package/dist/core/handleToolCall.js.map +1 -0
- package/dist/core/schemas.d.ts +454 -0
- package/dist/core/schemas.d.ts.map +1 -0
- package/dist/core/schemas.js +322 -0
- package/dist/core/schemas.js.map +1 -0
- package/dist/core/searchers.d.ts +45 -0
- package/dist/core/searchers.d.ts.map +1 -0
- package/dist/core/searchers.js +73 -0
- package/dist/core/searchers.js.map +1 -0
- package/dist/core/tools.d.ts +7 -0
- package/dist/core/tools.d.ts.map +1 -0
- package/dist/core/tools.js +640 -0
- package/dist/core/tools.js.map +1 -0
- package/dist/models/Paper.d.ts +64 -0
- package/dist/models/Paper.d.ts.map +1 -0
- package/dist/models/Paper.js +70 -0
- package/dist/models/Paper.js.map +1 -0
- package/dist/platforms/ArxivSearcher.d.ts +64 -0
- package/dist/platforms/ArxivSearcher.d.ts.map +1 -0
- package/dist/platforms/ArxivSearcher.js +531 -0
- package/dist/platforms/ArxivSearcher.js.map +1 -0
- package/dist/platforms/BioRxivSearcher.d.ts +47 -0
- package/dist/platforms/BioRxivSearcher.d.ts.map +1 -0
- package/dist/platforms/BioRxivSearcher.js +196 -0
- package/dist/platforms/BioRxivSearcher.js.map +1 -0
- package/dist/platforms/CORESearcher.d.ts +16 -0
- package/dist/platforms/CORESearcher.d.ts.map +1 -0
- package/dist/platforms/CORESearcher.js +148 -0
- package/dist/platforms/CORESearcher.js.map +1 -0
- package/dist/platforms/CrossrefSearcher.d.ts +34 -0
- package/dist/platforms/CrossrefSearcher.d.ts.map +1 -0
- package/dist/platforms/CrossrefSearcher.js +339 -0
- package/dist/platforms/CrossrefSearcher.js.map +1 -0
- package/dist/platforms/EuropePMCSearcher.d.ts +20 -0
- package/dist/platforms/EuropePMCSearcher.d.ts.map +1 -0
- package/dist/platforms/EuropePMCSearcher.js +173 -0
- package/dist/platforms/EuropePMCSearcher.js.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts +77 -0
- package/dist/platforms/GoogleScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/GoogleScholarSearcher.js +262 -0
- package/dist/platforms/GoogleScholarSearcher.js.map +1 -0
- package/dist/platforms/IACRSearcher.d.ts +51 -0
- package/dist/platforms/IACRSearcher.d.ts.map +1 -0
- package/dist/platforms/IACRSearcher.js +339 -0
- package/dist/platforms/IACRSearcher.js.map +1 -0
- package/dist/platforms/OpenAIRESearcher.d.ts +22 -0
- package/dist/platforms/OpenAIRESearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAIRESearcher.js +223 -0
- package/dist/platforms/OpenAIRESearcher.js.map +1 -0
- package/dist/platforms/OpenAlexSearcher.d.ts +14 -0
- package/dist/platforms/OpenAlexSearcher.d.ts.map +1 -0
- package/dist/platforms/OpenAlexSearcher.js +114 -0
- package/dist/platforms/OpenAlexSearcher.js.map +1 -0
- package/dist/platforms/PMCSearcher.d.ts +20 -0
- package/dist/platforms/PMCSearcher.d.ts.map +1 -0
- package/dist/platforms/PMCSearcher.js +177 -0
- package/dist/platforms/PMCSearcher.js.map +1 -0
- package/dist/platforms/PaperSource.d.ts +143 -0
- package/dist/platforms/PaperSource.d.ts.map +1 -0
- package/dist/platforms/PaperSource.js +125 -0
- package/dist/platforms/PaperSource.js.map +1 -0
- package/dist/platforms/PubMedSearcher.d.ts +104 -0
- package/dist/platforms/PubMedSearcher.d.ts.map +1 -0
- package/dist/platforms/PubMedSearcher.js +422 -0
- package/dist/platforms/PubMedSearcher.js.map +1 -0
- package/dist/platforms/SciHubSearcher.d.ts +66 -0
- package/dist/platforms/SciHubSearcher.d.ts.map +1 -0
- package/dist/platforms/SciHubSearcher.js +398 -0
- package/dist/platforms/SciHubSearcher.js.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts +42 -0
- package/dist/platforms/ScienceDirectSearcher.d.ts.map +1 -0
- package/dist/platforms/ScienceDirectSearcher.js +326 -0
- package/dist/platforms/ScienceDirectSearcher.js.map +1 -0
- package/dist/platforms/ScopusSearcher.d.ts +43 -0
- package/dist/platforms/ScopusSearcher.d.ts.map +1 -0
- package/dist/platforms/ScopusSearcher.js +364 -0
- package/dist/platforms/ScopusSearcher.js.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts +96 -0
- package/dist/platforms/SemanticScholarSearcher.d.ts.map +1 -0
- package/dist/platforms/SemanticScholarSearcher.js +419 -0
- package/dist/platforms/SemanticScholarSearcher.js.map +1 -0
- package/dist/platforms/SpringerSearcher.d.ts +54 -0
- package/dist/platforms/SpringerSearcher.d.ts.map +1 -0
- package/dist/platforms/SpringerSearcher.js +407 -0
- package/dist/platforms/SpringerSearcher.js.map +1 -0
- package/dist/platforms/UnpaywallSearcher.d.ts +18 -0
- package/dist/platforms/UnpaywallSearcher.d.ts.map +1 -0
- package/dist/platforms/UnpaywallSearcher.js +115 -0
- package/dist/platforms/UnpaywallSearcher.js.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts +111 -0
- package/dist/platforms/WebOfScienceSearcher.d.ts.map +1 -0
- package/dist/platforms/WebOfScienceSearcher.js +500 -0
- package/dist/platforms/WebOfScienceSearcher.js.map +1 -0
- package/dist/platforms/WileySearcher.d.ts +44 -0
- package/dist/platforms/WileySearcher.d.ts.map +1 -0
- package/dist/platforms/WileySearcher.js +148 -0
- package/dist/platforms/WileySearcher.js.map +1 -0
- package/dist/services/CitationService.d.ts +66 -0
- package/dist/services/CitationService.d.ts.map +1 -0
- package/dist/services/CitationService.js +237 -0
- package/dist/services/CitationService.js.map +1 -0
- package/dist/services/MultiSourceSearchService.d.ts +19 -0
- package/dist/services/MultiSourceSearchService.d.ts.map +1 -0
- package/dist/services/MultiSourceSearchService.js +96 -0
- package/dist/services/MultiSourceSearchService.js.map +1 -0
- package/dist/services/OpenAccessFallbackService.d.ts +20 -0
- package/dist/services/OpenAccessFallbackService.d.ts.map +1 -0
- package/dist/services/OpenAccessFallbackService.js +124 -0
- package/dist/services/OpenAccessFallbackService.js.map +1 -0
- package/dist/utils/ErrorHandler.d.ts +99 -0
- package/dist/utils/ErrorHandler.d.ts.map +1 -0
- package/dist/utils/ErrorHandler.js +266 -0
- package/dist/utils/ErrorHandler.js.map +1 -0
- package/dist/utils/Logger.d.ts +6 -0
- package/dist/utils/Logger.d.ts.map +1 -0
- package/dist/utils/Logger.js +26 -0
- package/dist/utils/Logger.js.map +1 -0
- package/dist/utils/PDFExtractor.d.ts +34 -0
- package/dist/utils/PDFExtractor.d.ts.map +1 -0
- package/dist/utils/PDFExtractor.js +130 -0
- package/dist/utils/PDFExtractor.js.map +1 -0
- package/dist/utils/PdfDownload.d.ts +7 -0
- package/dist/utils/PdfDownload.d.ts.map +1 -0
- package/dist/utils/PdfDownload.js +52 -0
- package/dist/utils/PdfDownload.js.map +1 -0
- package/dist/utils/QuotaManager.d.ts +32 -0
- package/dist/utils/QuotaManager.d.ts.map +1 -0
- package/dist/utils/QuotaManager.js +95 -0
- package/dist/utils/QuotaManager.js.map +1 -0
- package/dist/utils/RateLimiter.d.ts +50 -0
- package/dist/utils/RateLimiter.d.ts.map +1 -0
- package/dist/utils/RateLimiter.js +121 -0
- package/dist/utils/RateLimiter.js.map +1 -0
- package/dist/utils/RequestCache.d.ts +26 -0
- package/dist/utils/RequestCache.d.ts.map +1 -0
- package/dist/utils/RequestCache.js +66 -0
- package/dist/utils/RequestCache.js.map +1 -0
- package/dist/utils/SecurityUtils.d.ts +80 -0
- package/dist/utils/SecurityUtils.d.ts.map +1 -0
- package/dist/utils/SecurityUtils.js +357 -0
- package/dist/utils/SecurityUtils.js.map +1 -0
- package/package.json +111 -0
- package/skills/paper-search/SKILL.md +192 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"IACRSearcher.js","sourceRoot":"","sources":["../../src/platforms/IACRSearcher.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAClD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAC9C,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAOxD,MAAM,OAAO,YAAa,SAAQ,WAAW;IAC1B,SAAS,CAAS;IAClB,UAAU,CAAW;IACrB,WAAW,CAAc;IAE1C;QACE,KAAK,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;QACzC,IAAI,CAAC,SAAS,GAAG,GAAG,IAAI,CAAC,OAAO,SAAS,CAAC;QAC1C,IAAI,CAAC,UAAU,GAAG;YAChB,8DAA8D;YAC9D,iDAAiD;YACjD,oDAAoD;SACrD,CAAC;QACF,oCAAoC;QACpC,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC;YACjC,iBAAiB,EAAE,CAAC;YACpB,aAAa,EAAE,CAAC;SACjB,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI;YACd,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,KAAK;YAChB,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,cAAc,CAAC;SACjD,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAA6B,EAAE;QACzD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG;gBACb,CAAC,EAAE,KAAK;aACT,CAAC;YAEF,QAAQ,CAAC,yBAAyB,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;YACpD,QAAQ,CAAC,sBAAsB,EAAE,MAAM,CAAC,CAAC;YAEzC,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE;gBAC9B,MAAM;gBACN,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,OAAO,EAAE;oBACP,YAAY,EAAE,IAAI,CAAC,kBAAkB,EAAE;oBACvC,QAAQ,EAAE,iEAAiE;oBAC3E,iBAAiB,EAAE,gBAAgB;iBACpC;aACF,CAAC,EACF,EAAE,OAAO,EAAE,aAAa,EAAE,CAC3B,CAAC;YAEF,QAAQ,CAAC,sBAAsB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;YAEzE,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACtE,QAAQ,CAAC,eAAe,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;YAEhD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YAC9C,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe,CAAC,OAAe;QACnC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,EAAE,CAAC;YAErF,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE;gBACxB,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,OAAO,EAAE;oBACP,YAAY,EAAE,IAAI,CAAC,kBAAkB,EAAE;oBACvC,QAAQ,EAAE,iEAAiE;oBAC3E,iBAAiB,EAAE,gBAAgB;iBACpC;aACF,CAAC,EACF,EAAE,OAAO,EAAE,oBAAoB,EAAE,CAClC,CAAC;YAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC5B,QAAQ,CAAC,uCAAuC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;gBACnE,OAAO,IAAI,CAAC;YACd,CAAC;YAED,OAAO,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC5D,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,QAAQ,CAAC,oCAAoC,OAAO,GAAG,EAAE,KAAK,CAAC,OAAO,CAAC,CAAC;YACxE,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC9D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,MAAM,CAAC;YAChD,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,aAAa,CAAC;YAEnD,WAAW;YACX,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC7B,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC;YAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAE/C,YAAY;YACZ,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;gBAClD,OAAO,QAAQ,CAAC;YAClB,CAAC;YAED,MAAM,IAAI,CAAC,WAAW,CAAC,iBAAiB,EAAE,CAAC;YAE3C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,gBAAgB,CAClD,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE;gBACtB,YAAY,EAAE,QAAQ;gBACtB,OAAO,EAAE,QAAQ,CAAC,QAAQ;gBAC1B,OAAO,EAAE,EAAE,YAAY,EAAE,IAAI,CAAC,kBAAkB,EAAE,EAAE;aACrD,CAAC,EACF,EAAE,OAAO,EAAE,eAAe,EAAE,CAC7B,CAAC;YAEF,MAAM,MAAM,GAAG,EAAE,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;YAC9C,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAE3B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACrC,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC7C,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC7B,CAAC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,cAAc,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,SAAS,CAAC,OAAe,EAAE,UAA2B,EAAE;QAC5D,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,aAAa,CAAC;YACnD,MAAM,QAAQ,GAAG,QAAQ,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,MAAM,CAAC;YAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;YAE/C,eAAe;YACf,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC3C,CAAC;YAED,OAAO,2BAA2B,QAAQ,wEAAwE,CAAC;QACrH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,mBAAmB,CAAC,IAAY,EAAE,OAA0B;QACxE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,aAAa;QACb,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;YACjC,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC;gBAE5B,YAAY;gBACZ,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,KAAK,EAAE,CAAC;gBAC9D,IAAI,CAAC,SAAS,CAAC,MAAM;oBAAE,OAAO;gBAE9B,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAEvD,UAAU;gBACV,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC;gBACzD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAEzE,SAAS;gBACT,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;gBACvD,IAAI,WAAW,GAAgB,IAAI,CAAC;gBACpC,IAAI,eAAe,CAAC,MAAM,EAAE,CAAC;oBAC3B,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC5E,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;gBACzC,CAAC;gBAED,YAAY;gBACZ,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;gBAC7C,IAAI,CAAC,UAAU,CAAC,MAAM;oBAAE,OAAO;gBAE/B,OAAO;gBACP,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAEtC,OAAO;gBACP,MAAM,WAAW,GAAG,UAAU,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC,KAAK,EAAE,CAAC;gBAC/D,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;oBAClC,WAAW,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAElE,OAAO;gBACP,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,KAAK,EAAE,CAAC;gBAC5D,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAE3E,OAAO;gBACP,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,KAAK,EAAE,CAAC;gBAClE,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBAE5C,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC;oBAChC,OAAO,EAAE,OAAO;oBAChB,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;oBAC5B,OAAO,EAAE,OAAO;oBAChB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;oBAClC,GAAG,EAAE,EAAE;oBACP,aAAa,EAAE,WAAW,IAAI,IAAI,IAAI,EAAE;oBACxC,MAAM,EAAE,MAAM;oBACd,GAAG,EAAE,QAAQ;oBACb,MAAM,EAAE,MAAM;oBACd,WAAW,EAAE,WAAW,IAAI,SAAS;oBACrC,UAAU,EAAE,UAAU;oBACtB,QAAQ,EAAE,EAAE;oBACZ,aAAa,EAAE,CAAC;oBAChB,IAAI,EAAE,WAAW,EAAE,WAAW,EAAE;oBAChC,KAAK,EAAE;wBACL,MAAM,EAAE,OAAO;qBAChB;iBACF,CAAC,CAAC;gBAEH,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,QAAQ,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;YACvD,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,uBAAuB;QACvB,IAAI,OAAO,CAAC,YAAY,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,QAAQ,CAAC,kDAAkD,CAAC,CAAC;YAE7D,MAAM,cAAc,GAAY,EAAE,CAAC;YACnC,MAAM,WAAW,GAAG,CAAC,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;gBACpD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;gBAC/C,MAAM,cAAc,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;oBAC7C,IAAI,CAAC;wBACH,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;wBAChE,OAAO,aAAa,IAAI,KAAK,CAAC,CAAC,YAAY;oBAC7C,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,QAAQ,CAAC,8BAA8B,KAAK,CAAC,OAAO,GAAG,EAAE,KAAK,CAAC,CAAC;wBAChE,OAAO,KAAK,CAAC;oBACf,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,cAAc,CAAC,IAAI,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC;YAC5D,CAAC;YACD,OAAO,cAAc,CAAC;QACxB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,IAAY,EAAE,OAAe;QACzD,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAE7B,OAAO;YACP,MAAM,KAAK,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAEzC,OAAO;YACP,MAAM,UAAU,GAAG,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACnD,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC;gBAC1B,UAAU,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEjF,OAAO;YACP,MAAM,QAAQ,GAAG,CAAC,CAAC,mCAAmC,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAEtE,QAAQ;YACR,MAAM,QAAQ,GAAa,EAAE,CAAC;YAC9B,CAAC,CAAC,8BAA8B,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE;gBACxD,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;YAC1C,CAAC,CAAC,CAAC;YAEH,cAAc;YACd,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC;YAEjF,IAAI,eAAe,GAAG,EAAE,CAAC;YACzB,IAAI,cAAc,GAAa,EAAE,CAAC;YAClC,IAAI,WAAW,GAAgB,IAAI,CAAC;YAEpC,SAAS;YACT,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;oBAClE,eAAe,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;oBAC/B,MAAM;gBACR,CAAC;YACH,CAAC;YAED,SAAS;YACT,IAAI,YAAY,GAAG,KAAK,CAAC;YACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACtB,IAAI,IAAI,KAAK,SAAS,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBAC9C,YAAY,GAAG,IAAI,CAAC;oBACpB,SAAS;gBACX,CAAC;qBAAM,IAAI,YAAY,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;oBAC/E,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBAC1B,sBAAsB;oBACtB,IAAI,CAAC,WAAW,EAAE,CAAC;wBACjB,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;wBAC1C,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;oBACxC,CAAC;gBACH,CAAC;qBAAM,IAAI,YAAY,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;oBACxF,MAAM;gBACR,CAAC;YACH,CAAC;YAED,YAAY;YACZ,MAAM,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,MAAM,CAAC;YAChD,MAAM,QAAQ,GAAG,GAAG,IAAI,CAAC,OAAO,IAAI,OAAO,EAAE,CAAC;YAE9C,sBAAsB;YACtB,MAAM,aAAa,GAAG,WAAW,IAAI,IAAI,IAAI,EAAE,CAAC;YAEhD,OAAO,YAAY,CAAC,MAAM,CAAC;gBACzB,OAAO,EAAE,OAAO;gBAChB,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;gBAC5B,OAAO,EAAE,OAAO;gBAChB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;gBAClC,GAAG,EAAE,EAAE;gBACP,aAAa,EAAE,aAAa;gBAC5B,MAAM,EAAE,MAAM;gBACd,GAAG,EAAE,QAAQ;gBACb,MAAM,EAAE,MAAM;gBACd,WAAW,EAAE,WAAW,IAAI,SAAS;gBACrC,UAAU,EAAE,EAAE;gBACd,QAAQ,EAAE,QAAQ;gBAClB,aAAa,EAAE,CAAC;gBAChB,IAAI,EAAE,aAAa,CAAC,WAAW,EAAE;gBACjC,KAAK,EAAE;oBACL,MAAM,EAAE,OAAO;oBACf,eAAe,EAAE,eAAe;oBAChC,OAAO,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC;iBACnC;aACF,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,QAAQ,CAAC,mCAAmC,EAAE,KAAK,CAAC,CAAC;YACrD,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,kBAAkB;QACxB,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,EAAU;QACtB,OAAO,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;IACzD,CAAC;CACF"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Paper } from '../models/Paper.js';
|
|
2
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
3
|
+
export declare class OpenAIRESearcher extends PaperSource {
|
|
4
|
+
private readonly client;
|
|
5
|
+
constructor(apiKey?: string);
|
|
6
|
+
getCapabilities(): PlatformCapabilities;
|
|
7
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
8
|
+
downloadPdf(_paperId: string, _options?: DownloadOptions): Promise<string>;
|
|
9
|
+
readPaper(_paperId: string, _options?: DownloadOptions): Promise<string>;
|
|
10
|
+
private searchXml;
|
|
11
|
+
private parseResult;
|
|
12
|
+
private collectNodes;
|
|
13
|
+
private flattenStrings;
|
|
14
|
+
private first;
|
|
15
|
+
private textsForKeys;
|
|
16
|
+
private firstTextForKeys;
|
|
17
|
+
private nodeText;
|
|
18
|
+
private collectLikelyAbstract;
|
|
19
|
+
private extractDoi;
|
|
20
|
+
private hash;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=OpenAIRESearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OpenAIRESearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/OpenAIRESearcher.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAGrG,qBAAa,gBAAiB,SAAQ,WAAW;IAC/C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,CAAC,EAAE,MAAM;IAgB3B,eAAe,IAAI,oBAAoB;IAWjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IA2BpE,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAI9E,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;YAIpE,SAAS;IA+BvB,OAAO,CAAC,WAAW;IAiCnB,OAAO,CAAC,YAAY;IAiBpB,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,KAAK;IAKb,OAAO,CAAC,YAAY;IAsBpB,OAAO,CAAC,gBAAgB;IAIxB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,qBAAqB;IAI7B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,IAAI;CAKb"}
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import * as xml2js from 'xml2js';
|
|
3
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
4
|
+
import { PaperSource } from './PaperSource.js';
|
|
5
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
6
|
+
export class OpenAIRESearcher extends PaperSource {
|
|
7
|
+
client;
|
|
8
|
+
constructor(apiKey) {
|
|
9
|
+
const configuredKey = apiKey || process.env.PAPER_SEARCH_OPENAIRE_API_KEY || process.env.OPENAIRE_API_KEY || '';
|
|
10
|
+
super('openaire', 'https://api.openaire.eu', configuredKey);
|
|
11
|
+
this.client = axios.create({
|
|
12
|
+
baseURL: this.baseUrl,
|
|
13
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
14
|
+
headers: {
|
|
15
|
+
Accept: 'application/xml,text/xml;q=0.9,*/*;q=0.8',
|
|
16
|
+
'User-Agent': USER_AGENT,
|
|
17
|
+
...(configuredKey ? { Authorization: `Bearer ${configuredKey}` } : {})
|
|
18
|
+
},
|
|
19
|
+
responseType: 'text',
|
|
20
|
+
validateStatus: status => status < 500 || [500, 502, 503, 504].includes(status)
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
getCapabilities() {
|
|
24
|
+
return {
|
|
25
|
+
search: true,
|
|
26
|
+
download: false,
|
|
27
|
+
fullText: false,
|
|
28
|
+
citations: false,
|
|
29
|
+
requiresApiKey: false,
|
|
30
|
+
supportedOptions: ['maxResults', 'year']
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
async search(query, options = {}) {
|
|
34
|
+
try {
|
|
35
|
+
const xml = await this.searchXml(query, Math.min(options.maxResults || 10, 100));
|
|
36
|
+
const parsed = await xml2js.parseStringPromise(xml, {
|
|
37
|
+
explicitArray: false,
|
|
38
|
+
explicitCharkey: true,
|
|
39
|
+
attrkey: '$',
|
|
40
|
+
charkey: '_',
|
|
41
|
+
mergeAttrs: false
|
|
42
|
+
});
|
|
43
|
+
const resultNodes = this.collectNodes(parsed, 'result');
|
|
44
|
+
const papers = [];
|
|
45
|
+
for (const node of resultNodes) {
|
|
46
|
+
const paper = this.parseResult(node);
|
|
47
|
+
if (!paper)
|
|
48
|
+
continue;
|
|
49
|
+
if (options.year && paper.year !== Number(options.year))
|
|
50
|
+
continue;
|
|
51
|
+
papers.push(paper);
|
|
52
|
+
if (papers.length >= (options.maxResults || 10))
|
|
53
|
+
break;
|
|
54
|
+
}
|
|
55
|
+
return papers;
|
|
56
|
+
}
|
|
57
|
+
catch (error) {
|
|
58
|
+
this.handleHttpError(error, 'search');
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
async downloadPdf(_paperId, _options = {}) {
|
|
62
|
+
throw new Error('OpenAIRE does not provide direct downloads in this CLI. Use pdf_url with download_with_fallback.');
|
|
63
|
+
}
|
|
64
|
+
async readPaper(_paperId, _options = {}) {
|
|
65
|
+
throw new Error('OpenAIRE is a discovery source; direct full-text reading is not supported.');
|
|
66
|
+
}
|
|
67
|
+
async searchXml(query, maxResults) {
|
|
68
|
+
const profiles = [
|
|
69
|
+
{ page: 1, userAgent: USER_AGENT },
|
|
70
|
+
{ page: 0, userAgent: USER_AGENT },
|
|
71
|
+
{ page: 1, userAgent: 'Mozilla/5.0' }
|
|
72
|
+
];
|
|
73
|
+
let lastError;
|
|
74
|
+
for (const profile of profiles) {
|
|
75
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
76
|
+
try {
|
|
77
|
+
const response = await this.client.get('/search/researchProducts', {
|
|
78
|
+
params: { keywords: query, page: profile.page, size: maxResults },
|
|
79
|
+
headers: { 'User-Agent': profile.userAgent }
|
|
80
|
+
});
|
|
81
|
+
if ([403, 429, 500, 502, 503, 504].includes(response.status)) {
|
|
82
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(8000, 1000 * 2 ** attempt)));
|
|
83
|
+
continue;
|
|
84
|
+
}
|
|
85
|
+
if (response.status >= 400) {
|
|
86
|
+
throw new Error(`OpenAIRE request failed with HTTP ${response.status}`);
|
|
87
|
+
}
|
|
88
|
+
return String(response.data || '');
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
lastError = error;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
throw lastError instanceof Error ? lastError : new Error('OpenAIRE request failed');
|
|
96
|
+
}
|
|
97
|
+
parseResult(node) {
|
|
98
|
+
const flat = this.flattenStrings(node);
|
|
99
|
+
const title = this.firstTextForKeys(node, ['title', 'maintitle']) || this.first(flat, []);
|
|
100
|
+
if (!title)
|
|
101
|
+
return null;
|
|
102
|
+
const doi = this.extractDoi(flat.join(' '));
|
|
103
|
+
const urls = flat.filter(value => /^https?:\/\//i.test(value));
|
|
104
|
+
const pdfUrl = urls.find(url => url.toLowerCase().includes('pdf')) || '';
|
|
105
|
+
const dateText = this.firstTextForKeys(node, ['publicationdate', 'dateofacceptance', 'date']) ||
|
|
106
|
+
flat.find(value => /\b(19|20)\d{2}(-\d{2}-\d{2})?\b/.test(value)) ||
|
|
107
|
+
'';
|
|
108
|
+
const publishedDate = dateText ? this.parseDate(dateText.match(/\b(19|20)\d{2}(-\d{2}-\d{2})?\b/)?.[0] || '') : null;
|
|
109
|
+
const objId = this.firstTextForKeys(node, ['objidentifier', 'originalid', 'pid']) || `openaire_${Math.abs(this.hash(title))}`;
|
|
110
|
+
return PaperFactory.create({
|
|
111
|
+
paperId: objId,
|
|
112
|
+
title: this.cleanText(title),
|
|
113
|
+
authors: this.textsForKeys(node, ['creator', 'person']).slice(0, 20),
|
|
114
|
+
abstract: this.firstTextForKeys(node, ['description', 'abstract']) || this.collectLikelyAbstract(flat),
|
|
115
|
+
doi,
|
|
116
|
+
publishedDate,
|
|
117
|
+
pdfUrl,
|
|
118
|
+
url: urls[0] || (doi ? `https://doi.org/${doi}` : ''),
|
|
119
|
+
source: 'openaire',
|
|
120
|
+
year: publishedDate?.getFullYear(),
|
|
121
|
+
extra: {
|
|
122
|
+
openAccess: flat.some(value => /open access|openaire/i.test(value)),
|
|
123
|
+
sourceHint: 'openaire'
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
collectNodes(value, keyName) {
|
|
128
|
+
const out = [];
|
|
129
|
+
const walk = (node, key = '') => {
|
|
130
|
+
if (!node || typeof node !== 'object')
|
|
131
|
+
return;
|
|
132
|
+
if (key.toLowerCase() === keyName.toLowerCase())
|
|
133
|
+
out.push(node);
|
|
134
|
+
for (const [childKey, childValue] of Object.entries(node)) {
|
|
135
|
+
if (Array.isArray(childValue)) {
|
|
136
|
+
childValue.forEach(item => walk(item, childKey));
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
walk(childValue, childKey);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
walk(value);
|
|
144
|
+
return out;
|
|
145
|
+
}
|
|
146
|
+
flattenStrings(value) {
|
|
147
|
+
const out = [];
|
|
148
|
+
const walk = (node) => {
|
|
149
|
+
if (typeof node === 'string') {
|
|
150
|
+
const cleaned = this.cleanText(node);
|
|
151
|
+
if (cleaned)
|
|
152
|
+
out.push(cleaned);
|
|
153
|
+
return;
|
|
154
|
+
}
|
|
155
|
+
if (!node || typeof node !== 'object')
|
|
156
|
+
return;
|
|
157
|
+
for (const [childKey, childValue] of Object.entries(node)) {
|
|
158
|
+
if (childKey === '$')
|
|
159
|
+
continue;
|
|
160
|
+
if (Array.isArray(childValue))
|
|
161
|
+
childValue.forEach(walk);
|
|
162
|
+
else
|
|
163
|
+
walk(childValue);
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
walk(value);
|
|
167
|
+
return [...new Set(out)];
|
|
168
|
+
}
|
|
169
|
+
first(values, hints) {
|
|
170
|
+
if (hints.length === 0)
|
|
171
|
+
return values[0] || '';
|
|
172
|
+
return values.find(value => hints.some(hint => value.toLowerCase().includes(hint))) || values[0] || '';
|
|
173
|
+
}
|
|
174
|
+
textsForKeys(value, keys) {
|
|
175
|
+
const wanted = new Set(keys.map(key => key.toLowerCase()));
|
|
176
|
+
const out = [];
|
|
177
|
+
const walk = (node, key = '') => {
|
|
178
|
+
if (!node || typeof node !== 'object')
|
|
179
|
+
return;
|
|
180
|
+
const normalizedKey = key.toLowerCase();
|
|
181
|
+
if (wanted.has(normalizedKey)) {
|
|
182
|
+
const text = this.nodeText(node);
|
|
183
|
+
if (text)
|
|
184
|
+
out.push(text);
|
|
185
|
+
}
|
|
186
|
+
for (const [childKey, childValue] of Object.entries(node)) {
|
|
187
|
+
if (childKey === '$')
|
|
188
|
+
continue;
|
|
189
|
+
if (Array.isArray(childValue))
|
|
190
|
+
childValue.forEach(item => walk(item, childKey));
|
|
191
|
+
else
|
|
192
|
+
walk(childValue, childKey);
|
|
193
|
+
}
|
|
194
|
+
};
|
|
195
|
+
walk(value);
|
|
196
|
+
return [...new Set(out)];
|
|
197
|
+
}
|
|
198
|
+
firstTextForKeys(value, keys) {
|
|
199
|
+
return this.textsForKeys(value, keys)[0] || '';
|
|
200
|
+
}
|
|
201
|
+
nodeText(value) {
|
|
202
|
+
if (typeof value === 'string')
|
|
203
|
+
return this.cleanText(value);
|
|
204
|
+
if (!value || typeof value !== 'object')
|
|
205
|
+
return '';
|
|
206
|
+
if (typeof value._ === 'string')
|
|
207
|
+
return this.cleanText(value._);
|
|
208
|
+
return '';
|
|
209
|
+
}
|
|
210
|
+
collectLikelyAbstract(values) {
|
|
211
|
+
return values.find(value => value.length > 120 && !value.startsWith('http')) || '';
|
|
212
|
+
}
|
|
213
|
+
extractDoi(value) {
|
|
214
|
+
return value.match(/10\.\d{4,9}\/[-._;()/:A-Z0-9]+/i)?.[0] || '';
|
|
215
|
+
}
|
|
216
|
+
hash(value) {
|
|
217
|
+
let hash = 0;
|
|
218
|
+
for (let i = 0; i < value.length; i += 1)
|
|
219
|
+
hash = ((hash << 5) - hash + value.charCodeAt(i)) | 0;
|
|
220
|
+
return hash;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
//# sourceMappingURL=OpenAIRESearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OpenAIRESearcher.js","sourceRoot":"","sources":["../../src/platforms/OpenAIRESearcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwB,MAAM,OAAO,CAAC;AAC7C,OAAO,KAAK,MAAM,MAAM,QAAQ,CAAC;AACjC,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAE9D,MAAM,OAAO,gBAAiB,SAAQ,WAAW;IAC9B,MAAM,CAAgB;IAEvC,YAAY,MAAe;QACzB,MAAM,aAAa,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,6BAA6B,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,EAAE,CAAC;QAChH,KAAK,CAAC,UAAU,EAAE,yBAAyB,EAAE,aAAa,CAAC,CAAC;QAC5D,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,OAAO,EAAE;gBACP,MAAM,EAAE,0CAA0C;gBAClD,YAAY,EAAE,UAAU;gBACxB,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,aAAa,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACvE;YACD,YAAY,EAAE,MAAM;YACpB,cAAc,EAAE,MAAM,CAAC,EAAE,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC;SAChF,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,KAAK;YAChB,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,CAAC;SACzC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,EAAE,GAAG,CAAC,CAAC,CAAC;YACjF,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,GAAG,EAAE;gBAClD,aAAa,EAAE,KAAK;gBACpB,eAAe,EAAE,IAAI;gBACrB,OAAO,EAAE,GAAG;gBACZ,OAAO,EAAE,GAAG;gBACZ,UAAU,EAAE,KAAK;aAClB,CAAC,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;YACxD,MAAM,MAAM,GAAY,EAAE,CAAC;YAE3B,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;gBAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;gBACrC,IAAI,CAAC,KAAK;oBAAE,SAAS;gBACrB,IAAI,OAAO,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;oBAAE,SAAS;gBAClE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACnB,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC;oBAAE,MAAM;YACzD,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,WAA4B,EAAE;QAChE,MAAM,IAAI,KAAK,CAAC,kGAAkG,CAAC,CAAC;IACtH,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,QAAgB,EAAE,WAA4B,EAAE;QAC9D,MAAM,IAAI,KAAK,CAAC,4EAA4E,CAAC,CAAC;IAChG,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,UAAkB;QACvD,MAAM,QAAQ,GAAG;YACf,EAAE,IAAI,EAAE,CAAC,EAAE,SAAS,EAAE,UAAU,EAAE;YAClC,EAAE,IAAI,EAAE,CAAC,EAAE,SAAS,EAAE,UAAU,EAAE;YAClC,EAAE,IAAI,EAAE,CAAC,EAAE,SAAS,EAAE,aAAa,EAAE;SACtC,CAAC;QAEF,IAAI,SAAkB,CAAC;QACvB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC;gBAChD,IAAI,CAAC;oBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,0BAA0B,EAAE;wBACjE,MAAM,EAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE;wBACjE,OAAO,EAAE,EAAE,YAAY,EAAE,OAAO,CAAC,SAAS,EAAE;qBAC7C,CAAC,CAAC;oBACH,IAAI,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;wBAC7D,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,IAAI,GAAG,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;wBACvF,SAAS;oBACX,CAAC;oBACD,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;wBAC3B,MAAM,IAAI,KAAK,CAAC,qCAAqC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBAC1E,CAAC;oBACD,OAAO,MAAM,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;gBACrC,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,SAAS,GAAG,KAAK,CAAC;gBACpB,CAAC;YACH,CAAC;QACH,CAAC;QACD,MAAM,SAAS,YAAY,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;IACtF,CAAC;IAEO,WAAW,CAAC,IAAS;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC1F,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,GAAG,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAC5C,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACzE,MAAM,QAAQ,GACZ,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,CAAC,CAAC;YAC5E,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,iCAAiC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjE,EAAE,CAAC;QACL,MAAM,aAAa,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,KAAK,CAAC,iCAAiC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACrH,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,eAAe,EAAE,YAAY,EAAE,KAAK,CAAC,CAAC,IAAI,YAAY,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QAE9H,OAAO,YAAY,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,KAAK;YACd,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;YAC5B,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACpE,QAAQ,EAAE,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC,IAAI,IAAI,CAAC,qBAAqB,CAAC,IAAI,CAAC;YACtG,GAAG;YACH,aAAa;YACb,MAAM;YACN,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACrD,MAAM,EAAE,UAAU;YAClB,IAAI,EAAE,aAAa,EAAE,WAAW,EAAE;YAClC,KAAK,EAAE;gBACL,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,uBAAuB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACnE,UAAU,EAAE,UAAU;aACvB;SACF,CAAC,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,KAAU,EAAE,OAAe;QAC9C,MAAM,GAAG,GAAU,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,CAAC,IAAS,EAAE,GAAG,GAAG,EAAE,EAAE,EAAE;YACnC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;gBAAE,OAAO;YAC9C,IAAI,GAAG,CAAC,WAAW,EAAE,KAAK,OAAO,CAAC,WAAW,EAAE;gBAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAChE,KAAK,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1D,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;oBAC9B,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;gBACnD,CAAC;qBAAM,CAAC;oBACN,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC,CAAC;QACF,IAAI,CAAC,KAAK,CAAC,CAAC;QACZ,OAAO,GAAG,CAAC;IACb,CAAC;IAEO,cAAc,CAAC,KAAU;QAC/B,MAAM,GAAG,GAAa,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,CAAC,IAAS,EAAE,EAAE;YACzB,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;gBACrC,IAAI,OAAO;oBAAE,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAC/B,OAAO;YACT,CAAC;YACD,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;gBAAE,OAAO;YAC9C,KAAK,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1D,IAAI,QAAQ,KAAK,GAAG;oBAAE,SAAS;gBAC/B,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC;oBAAE,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;;oBACnD,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;QACH,CAAC,CAAC;QACF,IAAI,CAAC,KAAK,CAAC,CAAC;QACZ,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3B,CAAC;IAEO,KAAK,CAAC,MAAgB,EAAE,KAAe;QAC7C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,MAAM,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/C,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACzG,CAAC;IAEO,YAAY,CAAC,KAAU,EAAE,IAAc;QAC7C,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAC3D,MAAM,GAAG,GAAa,EAAE,CAAC;QAEzB,MAAM,IAAI,GAAG,CAAC,IAAS,EAAE,GAAG,GAAG,EAAE,EAAE,EAAE;YACnC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;gBAAE,OAAO;YAC9C,MAAM,aAAa,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YACxC,IAAI,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC;gBAC9B,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;gBACjC,IAAI,IAAI;oBAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3B,CAAC;YACD,KAAK,MAAM,CAAC,QAAQ,EAAE,UAAU,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1D,IAAI,QAAQ,KAAK,GAAG;oBAAE,SAAS;gBAC/B,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC;oBAAE,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;;oBAC3E,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;YAClC,CAAC;QACH,CAAC,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,CAAC;QACZ,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3B,CAAC;IAEO,gBAAgB,CAAC,KAAU,EAAE,IAAc;QACjD,OAAO,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,CAAC;IAEO,QAAQ,CAAC,KAAU;QACzB,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC5D,IAAI,CAAC,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,OAAO,EAAE,CAAC;QACnD,IAAI,OAAO,KAAK,CAAC,CAAC,KAAK,QAAQ;YAAE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAChE,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,qBAAqB,CAAC,MAAgB;QAC5C,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IACrF,CAAC;IAEO,UAAU,CAAC,KAAa;QAC9B,OAAO,KAAK,CAAC,KAAK,CAAC,iCAAiC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACnE,CAAC;IAEO,IAAI,CAAC,KAAa;QACxB,IAAI,IAAI,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC;YAAE,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAChG,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Paper } from '../models/Paper.js';
|
|
2
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
3
|
+
export declare class OpenAlexSearcher extends PaperSource {
|
|
4
|
+
private readonly client;
|
|
5
|
+
constructor();
|
|
6
|
+
getCapabilities(): PlatformCapabilities;
|
|
7
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
8
|
+
downloadPdf(_paperId: string, _options?: DownloadOptions): Promise<string>;
|
|
9
|
+
readPaper(_paperId: string, _options?: DownloadOptions): Promise<string>;
|
|
10
|
+
getPaperByDoi(doi: string): Promise<Paper | null>;
|
|
11
|
+
private parseWork;
|
|
12
|
+
private reconstructAbstract;
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=OpenAlexSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OpenAlexSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/OpenAlexSearcher.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAwBrG,qBAAa,gBAAiB,SAAQ,WAAW;IAC/C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;;IAcvC,eAAe,IAAI,oBAAoB;IAWjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAsBpE,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAI9E,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,QAAQ,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAI5E,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC;IAgBvD,OAAO,CAAC,SAAS;IAkCjB,OAAO,CAAC,mBAAmB;CAU5B"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
3
|
+
import { PaperSource } from './PaperSource.js';
|
|
4
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
5
|
+
export class OpenAlexSearcher extends PaperSource {
|
|
6
|
+
client;
|
|
7
|
+
constructor() {
|
|
8
|
+
super('openalex', 'https://api.openalex.org/works');
|
|
9
|
+
this.client = axios.create({
|
|
10
|
+
baseURL: this.baseUrl,
|
|
11
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
12
|
+
headers: {
|
|
13
|
+
Accept: 'application/json',
|
|
14
|
+
'User-Agent': `${USER_AGENT} (mailto:${process.env.CROSSREF_MAILTO || 'paper-search-cli@example.com'})`
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
getCapabilities() {
|
|
19
|
+
return {
|
|
20
|
+
search: true,
|
|
21
|
+
download: false,
|
|
22
|
+
fullText: false,
|
|
23
|
+
citations: true,
|
|
24
|
+
requiresApiKey: false,
|
|
25
|
+
supportedOptions: ['maxResults', 'year']
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
async search(query, options = {}) {
|
|
29
|
+
try {
|
|
30
|
+
const params = {
|
|
31
|
+
search: query,
|
|
32
|
+
per_page: Math.min(options.maxResults || 10, 200)
|
|
33
|
+
};
|
|
34
|
+
if (options.year) {
|
|
35
|
+
const year = options.year.match(/^\d{4}$/)?.[0];
|
|
36
|
+
if (year) {
|
|
37
|
+
params.filter = `from_publication_date:${year}-01-01,to_publication_date:${year}-12-31`;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const response = await this.client.get('', { params });
|
|
41
|
+
const results = Array.isArray(response.data?.results) ? response.data.results : [];
|
|
42
|
+
return results.map((item) => this.parseWork(item)).filter(Boolean);
|
|
43
|
+
}
|
|
44
|
+
catch (error) {
|
|
45
|
+
this.handleHttpError(error, 'search');
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
async downloadPdf(_paperId, _options = {}) {
|
|
49
|
+
throw new Error('OpenAlex does not host PDFs directly. Use pdf_url or download_with_fallback.');
|
|
50
|
+
}
|
|
51
|
+
async readPaper(_paperId, _options = {}) {
|
|
52
|
+
throw new Error('OpenAlex provides metadata and OA links only; it does not provide direct full text.');
|
|
53
|
+
}
|
|
54
|
+
async getPaperByDoi(doi) {
|
|
55
|
+
const cleanDoi = doi.trim().replace(/^https?:\/\/(?:dx\.)?doi\.org\//i, '');
|
|
56
|
+
if (!cleanDoi)
|
|
57
|
+
return null;
|
|
58
|
+
try {
|
|
59
|
+
const workId = encodeURIComponent(`https://doi.org/${cleanDoi}`);
|
|
60
|
+
const response = await this.client.get(`/${workId}`);
|
|
61
|
+
return this.parseWork(response.data);
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
if (error?.response?.status === 404) {
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
this.handleHttpError(error, 'getPaperByDoi');
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
parseWork(item) {
|
|
71
|
+
const title = item.title || item.display_name || '';
|
|
72
|
+
if (!title)
|
|
73
|
+
return null;
|
|
74
|
+
const doi = item.doi?.replace(/^https:\/\/doi\.org\//i, '') || '';
|
|
75
|
+
const primaryLocation = item.primary_location || {};
|
|
76
|
+
const openAccess = item.open_access || {};
|
|
77
|
+
const url = primaryLocation.landing_page_url || item.id || (doi ? `https://doi.org/${doi}` : '');
|
|
78
|
+
const pdfUrl = primaryLocation.pdf_url || openAccess.oa_url || '';
|
|
79
|
+
const concepts = (item.concepts || []).map(concept => concept.display_name || '').filter(Boolean);
|
|
80
|
+
return PaperFactory.create({
|
|
81
|
+
paperId: (item.id || '').replace('https://openalex.org/', '') || doi || title,
|
|
82
|
+
title,
|
|
83
|
+
authors: (item.authorships || [])
|
|
84
|
+
.map(authorship => authorship.author?.display_name || '')
|
|
85
|
+
.filter(Boolean),
|
|
86
|
+
abstract: this.reconstructAbstract(item.abstract_inverted_index),
|
|
87
|
+
doi,
|
|
88
|
+
publishedDate: item.publication_date ? this.parseDate(item.publication_date) : null,
|
|
89
|
+
pdfUrl,
|
|
90
|
+
url,
|
|
91
|
+
source: 'openalex',
|
|
92
|
+
categories: concepts.slice(0, 5),
|
|
93
|
+
citationCount: item.cited_by_count || 0,
|
|
94
|
+
year: item.publication_date ? Number(item.publication_date.slice(0, 4)) || undefined : undefined,
|
|
95
|
+
extra: {
|
|
96
|
+
openAccess: Boolean(openAccess.is_oa),
|
|
97
|
+
oaStatus: openAccess.oa_status || '',
|
|
98
|
+
openAlexId: item.id || ''
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
reconstructAbstract(index) {
|
|
103
|
+
if (!index)
|
|
104
|
+
return '';
|
|
105
|
+
const words = [];
|
|
106
|
+
for (const [word, positions] of Object.entries(index)) {
|
|
107
|
+
for (const position of positions || []) {
|
|
108
|
+
words.push([position, word]);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return words.sort((a, b) => a[0] - b[0]).map(([, word]) => word).join(' ');
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=OpenAlexSearcher.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"OpenAlexSearcher.js","sourceRoot":"","sources":["../../src/platforms/OpenAlexSearcher.ts"],"names":[],"mappings":"AAAA,OAAO,KAAwB,MAAM,OAAO,CAAC;AAC7C,OAAO,EAAS,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAwD,MAAM,kBAAkB,CAAC;AACrG,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AAuB9D,MAAM,OAAO,gBAAiB,SAAQ,WAAW;IAC9B,MAAM,CAAgB;IAEvC;QACE,KAAK,CAAC,UAAU,EAAE,gCAAgC,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,OAAO,EAAE;gBACP,MAAM,EAAE,kBAAkB;gBAC1B,YAAY,EAAE,GAAG,UAAU,YAAY,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,8BAA8B,GAAG;aACxG;SACF,CAAC,CAAC;IACL,CAAC;IAED,eAAe;QACb,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,KAAK;YACf,QAAQ,EAAE,KAAK;YACf,SAAS,EAAE,IAAI;YACf,cAAc,EAAE,KAAK;YACrB,gBAAgB,EAAE,CAAC,YAAY,EAAE,MAAM,CAAC;SACzC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;QACrD,IAAI,CAAC;YACH,MAAM,MAAM,GAA4B;gBACtC,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,EAAE,GAAG,CAAC;aAClD,CAAC;YAEF,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;gBACjB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBAChD,IAAI,IAAI,EAAE,CAAC;oBACT,MAAM,CAAC,MAAM,GAAG,yBAAyB,IAAI,8BAA8B,IAAI,QAAQ,CAAC;gBAC1F,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC;YACvD,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;YACnF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,IAAkB,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAY,CAAC;QAC9F,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,WAA4B,EAAE;QAChE,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IAClG,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,QAAgB,EAAE,WAA4B,EAAE;QAC9D,MAAM,IAAI,KAAK,CAAC,qFAAqF,CAAC,CAAC;IACzG,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW;QAC7B,MAAM,QAAQ,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,kCAAkC,EAAE,EAAE,CAAC,CAAC;QAC5E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,kBAAkB,CAAC,mBAAmB,QAAQ,EAAE,CAAC,CAAC;YACjE,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,MAAM,EAAE,CAAC,CAAC;YACrD,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAoB,CAAC,CAAC;QACvD,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,IAAI,KAAK,EAAE,QAAQ,EAAE,MAAM,KAAK,GAAG,EAAE,CAAC;gBACpC,OAAO,IAAI,CAAC;YACd,CAAC;YACD,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAEO,SAAS,CAAC,IAAkB;QAClC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,IAAI,EAAE,CAAC;QACpD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC;QAClE,MAAM,eAAe,GAAG,IAAI,CAAC,gBAAgB,IAAI,EAAE,CAAC;QACpD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;QAC1C,MAAM,GAAG,GAAG,eAAe,CAAC,gBAAgB,IAAI,IAAI,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACjG,MAAM,MAAM,GAAG,eAAe,CAAC,OAAO,IAAI,UAAU,CAAC,MAAM,IAAI,EAAE,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAElG,OAAO,YAAY,CAAC,MAAM,CAAC;YACzB,OAAO,EAAE,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,IAAI,GAAG,IAAI,KAAK;YAC7E,KAAK;YACL,OAAO,EAAE,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC;iBAC9B,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC,UAAU,CAAC,MAAM,EAAE,YAAY,IAAI,EAAE,CAAC;iBACxD,MAAM,CAAC,OAAO,CAAC;YAClB,QAAQ,EAAE,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,uBAAuB,CAAC;YAChE,GAAG;YACH,aAAa,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI;YACnF,MAAM;YACN,GAAG;YACH,MAAM,EAAE,UAAU;YAClB,UAAU,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC;YAChC,aAAa,EAAE,IAAI,CAAC,cAAc,IAAI,CAAC;YACvC,IAAI,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS;YAChG,KAAK,EAAE;gBACL,UAAU,EAAE,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;gBACrC,QAAQ,EAAE,UAAU,CAAC,SAAS,IAAI,EAAE;gBACpC,UAAU,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE;aAC1B;SACF,CAAC,CAAC;IACL,CAAC;IAEO,mBAAmB,CAAC,KAAgC;QAC1D,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QACtB,MAAM,KAAK,GAA4B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACtD,KAAK,MAAM,QAAQ,IAAI,SAAS,IAAI,EAAE,EAAE,CAAC;gBACvC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;QACD,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7E,CAAC;CACF"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Paper } from '../models/Paper.js';
|
|
2
|
+
import { PaperSource, SearchOptions, DownloadOptions, PlatformCapabilities } from './PaperSource.js';
|
|
3
|
+
export declare class PMCSearcher extends PaperSource {
|
|
4
|
+
private readonly client;
|
|
5
|
+
private readonly tool;
|
|
6
|
+
private readonly email;
|
|
7
|
+
constructor();
|
|
8
|
+
getCapabilities(): PlatformCapabilities;
|
|
9
|
+
search(query: string, options?: SearchOptions): Promise<Paper[]>;
|
|
10
|
+
downloadPdf(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
11
|
+
readPaper(paperId: string, options?: DownloadOptions): Promise<string>;
|
|
12
|
+
private parseSummary;
|
|
13
|
+
private findArticleId;
|
|
14
|
+
private resolvePdfUrls;
|
|
15
|
+
private resolveViaEuropePmc;
|
|
16
|
+
private resolveViaPmcOa;
|
|
17
|
+
private isEuropePmcRenderUrl;
|
|
18
|
+
private normalizePmcId;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=PMCSearcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PMCSearcher.d.ts","sourceRoot":"","sources":["../../src/platforms/PMCSearcher.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,EAAgB,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,MAAM,kBAAkB,CAAC;AAerG,qBAAa,WAAY,SAAQ,WAAW;IAC1C,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;IACvC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAA+C;IACpE,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA2F;;IAcjH,eAAe,IAAI,oBAAoB;IAWjC,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAiCpE,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAqB5E,SAAS,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,eAAoB,GAAG,OAAO,CAAC,MAAM,CAAC;IAMhF,OAAO,CAAC,YAAY;IAuBpB,OAAO,CAAC,aAAa;YAKP,cAAc;YAOd,mBAAmB;YAqCnB,eAAe;IAqB7B,OAAO,CAAC,oBAAoB;IAI5B,OAAO,CAAC,cAAc;CAIvB"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { PaperFactory } from '../models/Paper.js';
|
|
3
|
+
import { PaperSource } from './PaperSource.js';
|
|
4
|
+
import { TIMEOUTS, USER_AGENT } from '../config/constants.js';
|
|
5
|
+
import { downloadPdfFromUrl, safeFilename } from '../utils/PdfDownload.js';
|
|
6
|
+
import { PDFExtractor } from '../utils/PDFExtractor.js';
|
|
7
|
+
export class PMCSearcher extends PaperSource {
|
|
8
|
+
client;
|
|
9
|
+
tool = process.env.NCBI_TOOL || 'paper-search-cli';
|
|
10
|
+
email = process.env.NCBI_EMAIL || process.env.CROSSREF_MAILTO || 'paper-search-cli@example.com';
|
|
11
|
+
constructor() {
|
|
12
|
+
super('pmc', 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils');
|
|
13
|
+
this.client = axios.create({
|
|
14
|
+
baseURL: this.baseUrl,
|
|
15
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
16
|
+
headers: {
|
|
17
|
+
Accept: 'application/json',
|
|
18
|
+
'User-Agent': USER_AGENT
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
getCapabilities() {
|
|
23
|
+
return {
|
|
24
|
+
search: true,
|
|
25
|
+
download: true,
|
|
26
|
+
fullText: true,
|
|
27
|
+
citations: false,
|
|
28
|
+
requiresApiKey: false,
|
|
29
|
+
supportedOptions: ['maxResults', 'year']
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
async search(query, options = {}) {
|
|
33
|
+
try {
|
|
34
|
+
const term = options.year ? `${query} AND ${options.year}[pdat]` : query;
|
|
35
|
+
const searchResponse = await this.client.get('/esearch.fcgi', {
|
|
36
|
+
params: {
|
|
37
|
+
db: 'pmc',
|
|
38
|
+
term,
|
|
39
|
+
retmax: options.maxResults || 10,
|
|
40
|
+
retmode: 'json',
|
|
41
|
+
tool: this.tool,
|
|
42
|
+
email: this.email
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
const ids = searchResponse.data?.esearchresult?.idlist || [];
|
|
46
|
+
if (ids.length === 0)
|
|
47
|
+
return [];
|
|
48
|
+
const summaryResponse = await this.client.get('/esummary.fcgi', {
|
|
49
|
+
params: {
|
|
50
|
+
db: 'pmc',
|
|
51
|
+
id: ids.join(','),
|
|
52
|
+
retmode: 'json',
|
|
53
|
+
tool: this.tool,
|
|
54
|
+
email: this.email
|
|
55
|
+
}
|
|
56
|
+
});
|
|
57
|
+
const result = summaryResponse.data?.result || {};
|
|
58
|
+
return ids.map(id => this.parseSummary(result[id])).filter(Boolean);
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
this.handleHttpError(error, 'search');
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
async downloadPdf(paperId, options = {}) {
|
|
65
|
+
const pmcid = this.normalizePmcId(paperId);
|
|
66
|
+
const pdfUrls = await this.resolvePdfUrls(pmcid);
|
|
67
|
+
if (pdfUrls.length === 0) {
|
|
68
|
+
throw new Error(`PMC paper ${pmcid} does not expose a direct downloadable PDF URL. Some PMC viewer PDFs require browser proof-of-work; try Europe PMC, CORE, Unpaywall, or download_with_fallback.`);
|
|
69
|
+
}
|
|
70
|
+
const errors = [];
|
|
71
|
+
for (const pdfUrl of pdfUrls) {
|
|
72
|
+
try {
|
|
73
|
+
return await downloadPdfFromUrl(pdfUrl, options.savePath || './downloads', `pmc_${safeFilename(pmcid)}`);
|
|
74
|
+
}
|
|
75
|
+
catch (error) {
|
|
76
|
+
errors.push(`${pdfUrl}: ${error?.message || String(error)}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
throw new Error(`PMC paper ${pmcid} PDF candidates failed. ${errors.join(' | ')}`);
|
|
80
|
+
}
|
|
81
|
+
async readPaper(paperId, options = {}) {
|
|
82
|
+
const pdfPath = await this.downloadPdf(paperId, options);
|
|
83
|
+
const result = await new PDFExtractor().extractFromFile(pdfPath);
|
|
84
|
+
return result.text || `PDF downloaded to ${pdfPath}, but no text could be extracted.`;
|
|
85
|
+
}
|
|
86
|
+
parseSummary(item) {
|
|
87
|
+
if (!item?.uid || !item.title)
|
|
88
|
+
return null;
|
|
89
|
+
const pmcid = this.findArticleId(item, 'pmc') || this.findArticleId(item, 'pmcid') || `PMC${item.uid}`;
|
|
90
|
+
const normalizedPmcid = this.normalizePmcId(pmcid);
|
|
91
|
+
const doi = this.findArticleId(item, 'doi');
|
|
92
|
+
const journal = item.fulljournalname || item.source || '';
|
|
93
|
+
return PaperFactory.create({
|
|
94
|
+
paperId: normalizedPmcid,
|
|
95
|
+
title: this.cleanText(item.title),
|
|
96
|
+
authors: (item.authors || []).map(author => author.name || '').filter(Boolean),
|
|
97
|
+
abstract: '',
|
|
98
|
+
doi,
|
|
99
|
+
publishedDate: item.pubdate ? this.parseDate(item.pubdate) : null,
|
|
100
|
+
pdfUrl: `https://www.ncbi.nlm.nih.gov/pmc/articles/${normalizedPmcid}/pdf/`,
|
|
101
|
+
url: `https://www.ncbi.nlm.nih.gov/pmc/articles/${normalizedPmcid}/`,
|
|
102
|
+
source: 'pmc',
|
|
103
|
+
journal,
|
|
104
|
+
categories: journal ? [journal] : []
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
findArticleId(item, idType) {
|
|
108
|
+
const article = (item.articleids || []).find(id => id.idtype?.toLowerCase() === idType.toLowerCase());
|
|
109
|
+
return article?.value || '';
|
|
110
|
+
}
|
|
111
|
+
async resolvePdfUrls(pmcid) {
|
|
112
|
+
return [
|
|
113
|
+
...await this.resolveViaEuropePmc(pmcid),
|
|
114
|
+
...await this.resolveViaPmcOa(pmcid)
|
|
115
|
+
].filter((url, index, urls) => url && urls.indexOf(url) === index);
|
|
116
|
+
}
|
|
117
|
+
async resolveViaEuropePmc(pmcid) {
|
|
118
|
+
try {
|
|
119
|
+
const response = await axios.get('https://www.ebi.ac.uk/europepmc/webservices/rest/search', {
|
|
120
|
+
params: {
|
|
121
|
+
query: pmcid,
|
|
122
|
+
format: 'json',
|
|
123
|
+
resultType: 'core',
|
|
124
|
+
pageSize: 1
|
|
125
|
+
},
|
|
126
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
127
|
+
headers: {
|
|
128
|
+
Accept: 'application/json',
|
|
129
|
+
'User-Agent': USER_AGENT
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
const item = response.data?.resultList?.result?.[0];
|
|
133
|
+
const urls = item?.fullTextUrlList?.fullTextUrl;
|
|
134
|
+
const list = Array.isArray(urls) ? urls : urls ? [urls] : [];
|
|
135
|
+
const direct = list.filter((entry) => (String(entry?.documentStyle || '').toLowerCase() === 'pdf' &&
|
|
136
|
+
entry?.url &&
|
|
137
|
+
!this.isEuropePmcRenderUrl(entry.url) &&
|
|
138
|
+
!String(entry.url).startsWith('ftp://'))).map((entry) => entry.url);
|
|
139
|
+
const render = list.filter((entry) => (String(entry?.documentStyle || '').toLowerCase() === 'pdf' &&
|
|
140
|
+
entry?.url &&
|
|
141
|
+
this.isEuropePmcRenderUrl(entry.url))).map((entry) => entry.url);
|
|
142
|
+
return [...direct, ...render];
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return [];
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
async resolveViaPmcOa(pmcid) {
|
|
149
|
+
try {
|
|
150
|
+
const response = await axios.get('https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi', {
|
|
151
|
+
params: { id: pmcid },
|
|
152
|
+
timeout: TIMEOUTS.DEFAULT,
|
|
153
|
+
headers: {
|
|
154
|
+
Accept: 'application/xml,text/xml,*/*',
|
|
155
|
+
'User-Agent': USER_AGENT
|
|
156
|
+
},
|
|
157
|
+
responseType: 'text'
|
|
158
|
+
});
|
|
159
|
+
const xml = String(response.data || '');
|
|
160
|
+
const matches = Array.from(xml.matchAll(/<link\b(?=[^>]*\bformat=["']pdf["'])(?=[^>]*\bhref=["']([^"']+)["'])[^>]*>/gi));
|
|
161
|
+
return matches
|
|
162
|
+
.map(match => match[1] || '')
|
|
163
|
+
.filter(href => href && !href.startsWith('ftp://'));
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
return [];
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
isEuropePmcRenderUrl(url) {
|
|
170
|
+
return /europepmc\.org\/articles\/[^?]+\?pdf=render/i.test(url);
|
|
171
|
+
}
|
|
172
|
+
normalizePmcId(value) {
|
|
173
|
+
const cleaned = value.replace(/^PMCID:/i, '').trim();
|
|
174
|
+
return cleaned.toUpperCase().startsWith('PMC') ? cleaned : `PMC${cleaned}`;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
//# sourceMappingURL=PMCSearcher.js.map
|