@ncukondo/search-hub 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_virtual/_commonjsHelpers.js +30 -0
- package/dist/_virtual/_commonjsHelpers.js.map +1 -0
- package/dist/_virtual/aliases.js +5 -0
- package/dist/_virtual/aliases.js.map +1 -0
- package/dist/_virtual/attributes.js +5 -0
- package/dist/_virtual/attributes.js.map +1 -0
- package/dist/_virtual/back.js +5 -0
- package/dist/_virtual/back.js.map +1 -0
- package/dist/_virtual/comment.js +5 -0
- package/dist/_virtual/comment.js.map +1 -0
- package/dist/_virtual/compile.js +5 -0
- package/dist/_virtual/compile.js.map +1 -0
- package/dist/_virtual/compile2.js +5 -0
- package/dist/_virtual/compile2.js.map +1 -0
- package/dist/_virtual/decode-data-html.js +5 -0
- package/dist/_virtual/decode-data-html.js.map +1 -0
- package/dist/_virtual/decode-data-xml.js +5 -0
- package/dist/_virtual/decode-data-xml.js.map +1 -0
- package/dist/_virtual/decode.js +5 -0
- package/dist/_virtual/decode.js.map +1 -0
- package/dist/_virtual/decode_codepoint.js +5 -0
- package/dist/_virtual/decode_codepoint.js.map +1 -0
- package/dist/_virtual/encode-html.js +5 -0
- package/dist/_virtual/encode-html.js.map +1 -0
- package/dist/_virtual/encode.js +5 -0
- package/dist/_virtual/encode.js.map +1 -0
- package/dist/_virtual/escape.js +5 -0
- package/dist/_virtual/escape.js.map +1 -0
- package/dist/_virtual/feeds.js +5 -0
- package/dist/_virtual/feeds.js.map +1 -0
- package/dist/_virtual/filters.js +5 -0
- package/dist/_virtual/filters.js.map +1 -0
- package/dist/_virtual/foreignNames.js +5 -0
- package/dist/_virtual/foreignNames.js.map +1 -0
- package/dist/_virtual/general.js +5 -0
- package/dist/_virtual/general.js.map +1 -0
- package/dist/_virtual/he.js +5 -0
- package/dist/_virtual/he.js.map +1 -0
- package/dist/_virtual/helpers.js +5 -0
- package/dist/_virtual/helpers.js.map +1 -0
- package/dist/_virtual/html.js +5 -0
- package/dist/_virtual/html.js.map +1 -0
- package/dist/_virtual/index.js +6 -0
- package/dist/_virtual/index.js.map +1 -0
- package/dist/_virtual/index10.js +5 -0
- package/dist/_virtual/index10.js.map +1 -0
- package/dist/_virtual/index11.js +5 -0
- package/dist/_virtual/index11.js.map +1 -0
- package/dist/_virtual/index2.js +5 -0
- package/dist/_virtual/index2.js.map +1 -0
- package/dist/_virtual/index3.js +5 -0
- package/dist/_virtual/index3.js.map +1 -0
- package/dist/_virtual/index4.js +5 -0
- package/dist/_virtual/index4.js.map +1 -0
- package/dist/_virtual/index5.js +7 -0
- package/dist/_virtual/index5.js.map +1 -0
- package/dist/_virtual/index6.js +5 -0
- package/dist/_virtual/index6.js.map +1 -0
- package/dist/_virtual/index7.js +5 -0
- package/dist/_virtual/index7.js.map +1 -0
- package/dist/_virtual/index8.js +5 -0
- package/dist/_virtual/index8.js.map +1 -0
- package/dist/_virtual/index9.js +5 -0
- package/dist/_virtual/index9.js.map +1 -0
- package/dist/_virtual/legacy.js +5 -0
- package/dist/_virtual/legacy.js.map +1 -0
- package/dist/_virtual/manipulation.js +5 -0
- package/dist/_virtual/manipulation.js.map +1 -0
- package/dist/_virtual/matcher.js +5 -0
- package/dist/_virtual/matcher.js.map +1 -0
- package/dist/_virtual/node.js +5 -0
- package/dist/_virtual/node.js.map +1 -0
- package/dist/_virtual/node2.js +5 -0
- package/dist/_virtual/node2.js.map +1 -0
- package/dist/_virtual/parse.js +5 -0
- package/dist/_virtual/parse.js.map +1 -0
- package/dist/_virtual/parse2.js +5 -0
- package/dist/_virtual/parse2.js.map +1 -0
- package/dist/_virtual/pseudos.js +5 -0
- package/dist/_virtual/pseudos.js.map +1 -0
- package/dist/_virtual/querying.js +5 -0
- package/dist/_virtual/querying.js.map +1 -0
- package/dist/_virtual/sort.js +5 -0
- package/dist/_virtual/sort.js.map +1 -0
- package/dist/_virtual/stringify.js +5 -0
- package/dist/_virtual/stringify.js.map +1 -0
- package/dist/_virtual/subselects.js +5 -0
- package/dist/_virtual/subselects.js.map +1 -0
- package/dist/_virtual/text.js +5 -0
- package/dist/_virtual/text.js.map +1 -0
- package/dist/_virtual/traversal.js +5 -0
- package/dist/_virtual/traversal.js.map +1 -0
- package/dist/_virtual/type.js +5 -0
- package/dist/_virtual/type.js.map +1 -0
- package/dist/_virtual/valid.js +5 -0
- package/dist/_virtual/valid.js.map +1 -0
- package/dist/_virtual/void-tag.js +5 -0
- package/dist/_virtual/void-tag.js.map +1 -0
- package/dist/cli/commands/fulltext/attach.js +1 -1
- package/dist/cli/commands/fulltext/attach.js.map +1 -1
- package/dist/cli/commands/fulltext/check.d.ts +1 -2
- package/dist/cli/commands/fulltext/check.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/check.js +4 -2
- package/dist/cli/commands/fulltext/check.js.map +1 -1
- package/dist/cli/commands/fulltext/convert.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/convert.js +8 -8
- package/dist/cli/commands/fulltext/convert.js.map +1 -1
- package/dist/cli/commands/fulltext/fetch.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/fetch.js +10 -6
- package/dist/cli/commands/fulltext/fetch.js.map +1 -1
- package/dist/cli/commands/fulltext/index.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/index.js +2 -0
- package/dist/cli/commands/fulltext/index.js.map +1 -1
- package/dist/cli/commands/fulltext/init.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/init.js +6 -5
- package/dist/cli/commands/fulltext/init.js.map +1 -1
- package/dist/cli/commands/fulltext/pending.d.ts +1 -1
- package/dist/cli/commands/fulltext/pending.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/pending.js +4 -2
- package/dist/cli/commands/fulltext/pending.js.map +1 -1
- package/dist/cli/commands/fulltext/status.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/status.js +4 -2
- package/dist/cli/commands/fulltext/status.js.map +1 -1
- package/dist/cli/commands/fulltext/sync.d.ts.map +1 -1
- package/dist/cli/commands/fulltext/sync.js +6 -2
- package/dist/cli/commands/fulltext/sync.js.map +1 -1
- package/dist/cli/commands/review/types.d.ts +1 -1
- package/dist/cli/commands/review/types.d.ts.map +1 -1
- package/dist/cli/commands/review/types.js.map +1 -1
- package/dist/config/schema.d.ts +2 -0
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +6 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/{fulltext → integration}/attach-shared.d.ts +2 -2
- package/dist/integration/attach-shared.d.ts.map +1 -0
- package/dist/integration/attach-shared.js.map +1 -0
- package/dist/integration/fulltext-attach.js +1 -1
- package/dist/integration/fulltext-attach.js.map +1 -1
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/citation-key.js +1 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/citation-key.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js +434 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/arxiv-html-parser.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js +93 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/index.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js +1060 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/jats-parser.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/convert/markdown-writer.js +146 -117
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/convert/markdown-writer.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/arxiv.js +8 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/arxiv.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/core.js +6 -3
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/core.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js +139 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/index.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js +46 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/ncbi-id-converter.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/pmc.js +8 -4
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/pmc.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/discovery/unpaywall.js +43 -9
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/discovery/unpaywall.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js +48 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/arxiv-html.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js +64 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/downloader.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js +236 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/orchestrator.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/download/pmc-xml.js +2 -1
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/download/pmc-xml.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/meta.js +15 -10
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/meta.js.map +1 -0
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/paths.js.map +1 -0
- package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/readme.js +8 -4
- package/dist/node_modules/@ncukondo/academic-fulltext/dist/readme.js.map +1 -0
- package/dist/node_modules/boolbase/index.js +19 -0
- package/dist/node_modules/boolbase/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/attributes.js +203 -0
- package/dist/node_modules/css-select/lib/attributes.js.map +1 -0
- package/dist/node_modules/css-select/lib/compile.js +141 -0
- package/dist/node_modules/css-select/lib/compile.js.map +1 -0
- package/dist/node_modules/css-select/lib/general.js +154 -0
- package/dist/node_modules/css-select/lib/general.js.map +1 -0
- package/dist/node_modules/css-select/lib/index.js +128 -0
- package/dist/node_modules/css-select/lib/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js +40 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/aliases.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js +163 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/filters.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/index.js +71 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/index.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js +93 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/pseudos.js.map +1 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js +111 -0
- package/dist/node_modules/css-select/lib/pseudo-selectors/subselects.js.map +1 -0
- package/dist/node_modules/css-select/lib/sort.js +78 -0
- package/dist/node_modules/css-select/lib/sort.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/index.js +12 -0
- package/dist/node_modules/css-what/lib/es/index.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/parse.js +349 -0
- package/dist/node_modules/css-what/lib/es/parse.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/stringify.js +102 -0
- package/dist/node_modules/css-what/lib/es/stringify.js.map +1 -0
- package/dist/node_modules/css-what/lib/es/types.js +37 -0
- package/dist/node_modules/css-what/lib/es/types.js.map +1 -0
- package/dist/node_modules/dom-serializer/lib/foreignNames.js +117 -0
- package/dist/node_modules/dom-serializer/lib/foreignNames.js.map +1 -0
- package/dist/node_modules/dom-serializer/lib/index.js +207 -0
- package/dist/node_modules/dom-serializer/lib/index.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js +368 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js +70 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/decode_codepoint.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js +61 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/encode.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js +79 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/escape.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js +18 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-html.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js +18 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/decode-data-xml.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js +19 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/generated/encode-html.js.map +1 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js +139 -0
- package/dist/node_modules/dom-serializer/node_modules/entities/lib/index.js.map +1 -0
- package/dist/node_modules/domelementtype/lib/index.js +40 -0
- package/dist/node_modules/domelementtype/lib/index.js.map +1 -0
- package/dist/node_modules/domhandler/lib/index.js +167 -0
- package/dist/node_modules/domhandler/lib/index.js.map +1 -0
- package/dist/node_modules/domhandler/lib/node.js +439 -0
- package/dist/node_modules/domhandler/lib/node.js.map +1 -0
- package/dist/node_modules/domutils/lib/feeds.js +146 -0
- package/dist/node_modules/domutils/lib/feeds.js.map +1 -0
- package/dist/node_modules/domutils/lib/helpers.js +97 -0
- package/dist/node_modules/domutils/lib/helpers.js.map +1 -0
- package/dist/node_modules/domutils/lib/index.js +65 -0
- package/dist/node_modules/domutils/lib/index.js.map +1 -0
- package/dist/node_modules/domutils/lib/legacy.js +124 -0
- package/dist/node_modules/domutils/lib/legacy.js.map +1 -0
- package/dist/node_modules/domutils/lib/manipulation.js +107 -0
- package/dist/node_modules/domutils/lib/manipulation.js.map +1 -0
- package/dist/node_modules/domutils/lib/querying.js +102 -0
- package/dist/node_modules/domutils/lib/querying.js.map +1 -0
- package/dist/node_modules/domutils/lib/stringify.js +65 -0
- package/dist/node_modules/domutils/lib/stringify.js.map +1 -0
- package/dist/node_modules/domutils/lib/traversal.js +69 -0
- package/dist/node_modules/domutils/lib/traversal.js.map +1 -0
- package/dist/node_modules/he/he.js +256 -0
- package/dist/node_modules/he/he.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/back.js +16 -0
- package/dist/node_modules/node-html-parser/dist/back.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/index.js +48 -0
- package/dist/node_modules/node-html-parser/dist/index.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/matcher.js +112 -0
- package/dist/node_modules/node-html-parser/dist/matcher.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/comment.js +41 -0
- package/dist/node_modules/node-html-parser/dist/nodes/comment.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/html.js +1048 -0
- package/dist/node_modules/node-html-parser/dist/nodes/html.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/node.js +49 -0
- package/dist/node_modules/node-html-parser/dist/nodes/node.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/text.js +106 -0
- package/dist/node_modules/node-html-parser/dist/nodes/text.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/nodes/type.js +19 -0
- package/dist/node_modules/node-html-parser/dist/nodes/type.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/parse.js +20 -0
- package/dist/node_modules/node-html-parser/dist/parse.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/valid.js +19 -0
- package/dist/node_modules/node-html-parser/dist/valid.js.map +1 -0
- package/dist/node_modules/node-html-parser/dist/void-tag.js +36 -0
- package/dist/node_modules/node-html-parser/dist/void-tag.js.map +1 -0
- package/dist/node_modules/nth-check/lib/compile.js +76 -0
- package/dist/node_modules/nth-check/lib/compile.js.map +1 -0
- package/dist/node_modules/nth-check/lib/index.js +36 -0
- package/dist/node_modules/nth-check/lib/index.js.map +1 -0
- package/dist/node_modules/nth-check/lib/parse.js +69 -0
- package/dist/node_modules/nth-check/lib/parse.js.map +1 -0
- package/package.json +2 -2
- package/dist/fulltext/attach-shared.d.ts.map +0 -1
- package/dist/fulltext/attach-shared.js.map +0 -1
- package/dist/fulltext/citation-key.d.ts +0 -15
- package/dist/fulltext/citation-key.d.ts.map +0 -1
- package/dist/fulltext/citation-key.js.map +0 -1
- package/dist/fulltext/convert/index.d.ts +0 -20
- package/dist/fulltext/convert/index.d.ts.map +0 -1
- package/dist/fulltext/convert/index.js +0 -50
- package/dist/fulltext/convert/index.js.map +0 -1
- package/dist/fulltext/convert/jats-parser.d.ts +0 -36
- package/dist/fulltext/convert/jats-parser.d.ts.map +0 -1
- package/dist/fulltext/convert/jats-parser.js +0 -887
- package/dist/fulltext/convert/jats-parser.js.map +0 -1
- package/dist/fulltext/convert/markdown-writer.d.ts +0 -6
- package/dist/fulltext/convert/markdown-writer.d.ts.map +0 -1
- package/dist/fulltext/convert/markdown-writer.js.map +0 -1
- package/dist/fulltext/convert/types.d.ts +0 -141
- package/dist/fulltext/convert/types.d.ts.map +0 -1
- package/dist/fulltext/discovery/arxiv.d.ts +0 -11
- package/dist/fulltext/discovery/arxiv.d.ts.map +0 -1
- package/dist/fulltext/discovery/arxiv.js.map +0 -1
- package/dist/fulltext/discovery/core.d.ts +0 -11
- package/dist/fulltext/discovery/core.d.ts.map +0 -1
- package/dist/fulltext/discovery/core.js.map +0 -1
- package/dist/fulltext/discovery/index.d.ts +0 -28
- package/dist/fulltext/discovery/index.d.ts.map +0 -1
- package/dist/fulltext/discovery/index.js +0 -75
- package/dist/fulltext/discovery/index.js.map +0 -1
- package/dist/fulltext/discovery/pmc.d.ts +0 -19
- package/dist/fulltext/discovery/pmc.d.ts.map +0 -1
- package/dist/fulltext/discovery/pmc.js.map +0 -1
- package/dist/fulltext/discovery/unpaywall.d.ts +0 -11
- package/dist/fulltext/discovery/unpaywall.d.ts.map +0 -1
- package/dist/fulltext/discovery/unpaywall.js.map +0 -1
- package/dist/fulltext/download/downloader.d.ts +0 -21
- package/dist/fulltext/download/downloader.d.ts.map +0 -1
- package/dist/fulltext/download/downloader.js +0 -59
- package/dist/fulltext/download/downloader.js.map +0 -1
- package/dist/fulltext/download/orchestrator.d.ts +0 -33
- package/dist/fulltext/download/orchestrator.d.ts.map +0 -1
- package/dist/fulltext/download/orchestrator.js +0 -125
- package/dist/fulltext/download/orchestrator.js.map +0 -1
- package/dist/fulltext/download/pmc-xml.d.ts +0 -13
- package/dist/fulltext/download/pmc-xml.d.ts.map +0 -1
- package/dist/fulltext/download/pmc-xml.js.map +0 -1
- package/dist/fulltext/meta.d.ts +0 -25
- package/dist/fulltext/meta.d.ts.map +0 -1
- package/dist/fulltext/meta.js.map +0 -1
- package/dist/fulltext/paths.d.ts +0 -12
- package/dist/fulltext/paths.d.ts.map +0 -1
- package/dist/fulltext/paths.js.map +0 -1
- package/dist/fulltext/readme.d.ts +0 -4
- package/dist/fulltext/readme.d.ts.map +0 -1
- package/dist/fulltext/readme.js.map +0 -1
- package/dist/fulltext/types.d.ts +0 -90
- package/dist/fulltext/types.d.ts.map +0 -1
- /package/dist/{fulltext → integration}/attach-shared.js +0 -0
- /package/dist/{fulltext → node_modules/@ncukondo/academic-fulltext/dist}/paths.js +0 -0
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PDF downloader with retry and error handling.
|
|
3
|
-
*/
|
|
4
|
-
export interface DownloadOptions {
|
|
5
|
-
/** Number of retry attempts (default: 3) */
|
|
6
|
-
retries?: number;
|
|
7
|
-
/** Base delay between retries in ms (default: 1000) */
|
|
8
|
-
retryDelay?: number;
|
|
9
|
-
}
|
|
10
|
-
export interface DownloadResult {
|
|
11
|
-
success: boolean;
|
|
12
|
-
size?: number;
|
|
13
|
-
error?: string;
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Download a PDF from a URL to a local file path.
|
|
17
|
-
* Retries on network errors and 429 responses with exponential backoff.
|
|
18
|
-
* Does not retry on 403/404 or other client errors.
|
|
19
|
-
*/
|
|
20
|
-
export declare function downloadPdf(url: string, destPath: string, options?: DownloadOptions): Promise<DownloadResult>;
|
|
21
|
-
//# sourceMappingURL=downloader.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"downloader.d.ts","sourceRoot":"","sources":["../../../src/fulltext/download/downloader.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,MAAM,WAAW,eAAe;IAC9B,4CAA4C;IAC5C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAoBD;;;;GAIG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,eAAe,GACxB,OAAO,CAAC,cAAc,CAAC,CAoDzB"}
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
-
import { dirname } from "node:path";
|
|
3
|
-
const NON_RETRYABLE_STATUSES = /* @__PURE__ */ new Set([400, 401, 403, 404, 405, 410]);
|
|
4
|
-
const VALID_CONTENT_TYPES = ["application/pdf", "application/octet-stream"];
|
|
5
|
-
const USER_AGENT = "search-hub/0.8.0 (https://github.com/ncukondo/search-hub)";
|
|
6
|
-
function isValidPdfContentType(contentType) {
|
|
7
|
-
if (!contentType) return false;
|
|
8
|
-
const base = (contentType.split(";")[0] ?? "").trim().toLowerCase();
|
|
9
|
-
return VALID_CONTENT_TYPES.includes(base);
|
|
10
|
-
}
|
|
11
|
-
function sleep(ms) {
|
|
12
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
13
|
-
}
|
|
14
|
-
async function downloadPdf(url, destPath, options) {
|
|
15
|
-
const retries = options?.retries ?? 3;
|
|
16
|
-
const retryDelay = options?.retryDelay ?? 1e3;
|
|
17
|
-
let lastError;
|
|
18
|
-
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
19
|
-
try {
|
|
20
|
-
const response = await fetch(url, {
|
|
21
|
-
headers: { "User-Agent": USER_AGENT }
|
|
22
|
-
});
|
|
23
|
-
if (!response.ok) {
|
|
24
|
-
const status = response.status;
|
|
25
|
-
if (NON_RETRYABLE_STATUSES.has(status)) {
|
|
26
|
-
return { success: false, error: `HTTP ${status} ${response.statusText}` };
|
|
27
|
-
}
|
|
28
|
-
lastError = `HTTP ${status} ${response.statusText}`;
|
|
29
|
-
if (attempt < retries) {
|
|
30
|
-
await sleep(retryDelay * attempt);
|
|
31
|
-
continue;
|
|
32
|
-
}
|
|
33
|
-
return { success: false, error: lastError };
|
|
34
|
-
}
|
|
35
|
-
const contentType = response.headers.get("content-type");
|
|
36
|
-
if (!isValidPdfContentType(contentType)) {
|
|
37
|
-
return {
|
|
38
|
-
success: false,
|
|
39
|
-
error: `Unexpected Content-Type: ${contentType ?? "none"}`
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
const buffer = await response.arrayBuffer();
|
|
43
|
-
await mkdir(dirname(destPath), { recursive: true });
|
|
44
|
-
await writeFile(destPath, Buffer.from(buffer));
|
|
45
|
-
return { success: true, size: buffer.byteLength };
|
|
46
|
-
} catch (err) {
|
|
47
|
-
lastError = err instanceof Error ? err.message : String(err);
|
|
48
|
-
if (attempt < retries) {
|
|
49
|
-
await sleep(retryDelay * attempt);
|
|
50
|
-
continue;
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return { success: false, error: lastError ?? "Download failed" };
|
|
55
|
-
}
|
|
56
|
-
export {
|
|
57
|
-
downloadPdf
|
|
58
|
-
};
|
|
59
|
-
//# sourceMappingURL=downloader.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"downloader.js","sources":["../../../src/fulltext/download/downloader.ts"],"sourcesContent":["/**\n * PDF downloader with retry and error handling.\n */\n\nimport { writeFile, mkdir } from 'node:fs/promises';\nimport { dirname } from 'node:path';\n\nexport interface DownloadOptions {\n /** Number of retry attempts (default: 3) */\n retries?: number;\n /** Base delay between retries in ms (default: 1000) */\n retryDelay?: number;\n}\n\nexport interface DownloadResult {\n success: boolean;\n size?: number;\n error?: string;\n}\n\n/** HTTP status codes that should not be retried */\nconst NON_RETRYABLE_STATUSES = new Set([400, 401, 403, 404, 405, 410]);\n\n/** Content types accepted as valid PDF responses */\nconst VALID_CONTENT_TYPES = ['application/pdf', 'application/octet-stream'];\n\nconst USER_AGENT = 'search-hub/0.8.0 (https://github.com/ncukondo/search-hub)';\n\nfunction isValidPdfContentType(contentType: string | null): boolean {\n if (!contentType) return false;\n const base = (contentType.split(';')[0] ?? '').trim().toLowerCase();\n return VALID_CONTENT_TYPES.includes(base);\n}\n\nfunction sleep(ms: number): Promise<void> {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\n\n/**\n * Download a PDF from a URL to a local file path.\n * Retries on network errors and 429 responses with exponential backoff.\n * Does not retry on 403/404 or other client errors.\n */\nexport async function downloadPdf(\n url: string,\n destPath: string,\n options?: DownloadOptions,\n): Promise<DownloadResult> {\n const retries = options?.retries ?? 3;\n const retryDelay = options?.retryDelay ?? 1000;\n\n let lastError: string | undefined;\n\n for (let attempt = 1; attempt <= retries; attempt++) {\n try {\n const response = await fetch(url, {\n headers: { 'User-Agent': USER_AGENT },\n });\n\n if (!response.ok) {\n const status = response.status;\n if (NON_RETRYABLE_STATUSES.has(status)) {\n return { success: false, error: `HTTP ${status} ${response.statusText}` };\n }\n // Retryable status (429, 5xx)\n lastError = `HTTP ${status} ${response.statusText}`;\n if (attempt < retries) {\n await sleep(retryDelay * attempt);\n continue;\n }\n return { success: false, error: lastError };\n }\n\n // Validate content type\n const contentType = response.headers.get('content-type');\n if (!isValidPdfContentType(contentType)) {\n return {\n success: false,\n error: `Unexpected Content-Type: ${contentType ?? 'none'}`,\n };\n }\n\n const buffer = await response.arrayBuffer();\n\n // Ensure parent directory exists\n await mkdir(dirname(destPath), { recursive: true });\n await writeFile(destPath, Buffer.from(buffer));\n\n return { success: true, size: buffer.byteLength };\n } catch (err) {\n lastError = err instanceof Error ? err.message : String(err);\n if (attempt < retries) {\n await sleep(retryDelay * attempt);\n continue;\n }\n }\n }\n\n return { success: false, error: lastError ?? 'Download failed' };\n}\n"],"names":[],"mappings":";;AAqBA,MAAM,yBAAyB,oBAAI,IAAI,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG,CAAC;AAGrE,MAAM,sBAAsB,CAAC,mBAAmB,0BAA0B;AAE1E,MAAM,aAAa;AAEnB,SAAS,sBAAsB,aAAqC;AAClE,MAAI,CAAC,YAAa,QAAO;AACzB,QAAM,QAAQ,YAAY,MAAM,GAAG,EAAE,CAAC,KAAK,IAAI,KAAA,EAAO,YAAA;AACtD,SAAO,oBAAoB,SAAS,IAAI;AAC1C;AAEA,SAAS,MAAM,IAA2B;AACxC,SAAO,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,EAAE,CAAC;AACzD;AAOA,eAAsB,YACpB,KACA,UACA,SACyB;AACzB,QAAM,UAAU,SAAS,WAAW;AACpC,QAAM,aAAa,SAAS,cAAc;AAE1C,MAAI;AAEJ,WAAS,UAAU,GAAG,WAAW,SAAS,WAAW;AACnD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,KAAK;AAAA,QAChC,SAAS,EAAE,cAAc,WAAA;AAAA,MAAW,CACrC;AAED,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,SAAS,SAAS;AACxB,YAAI,uBAAuB,IAAI,MAAM,GAAG;AACtC,iBAAO,EAAE,SAAS,OAAO,OAAO,QAAQ,MAAM,IAAI,SAAS,UAAU,GAAA;AAAA,QACvE;AAEA,oBAAY,QAAQ,MAAM,IAAI,SAAS,UAAU;AACjD,YAAI,UAAU,SAAS;AACrB,gBAAM,MAAM,aAAa,OAAO;AAChC;AAAA,QACF;AACA,eAAO,EAAE,SAAS,OAAO,OAAO,UAAA;AAAA,MAClC;AAGA,YAAM,cAAc,SAAS,QAAQ,IAAI,cAAc;AACvD,UAAI,CAAC,sBAAsB,WAAW,GAAG;AACvC,eAAO;AAAA,UACL,SAAS;AAAA,UACT,OAAO,4BAA4B,eAAe,MAAM;AAAA,QAAA;AAAA,MAE5D;AAEA,YAAM,SAAS,MAAM,SAAS,YAAA;AAG9B,YAAM,MAAM,QAAQ,QAAQ,GAAG,EAAE,WAAW,MAAM;AAClD,YAAM,UAAU,UAAU,OAAO,KAAK,MAAM,CAAC;AAE7C,aAAO,EAAE,SAAS,MAAM,MAAM,OAAO,WAAA;AAAA,IACvC,SAAS,KAAK;AACZ,kBAAY,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC3D,UAAI,UAAU,SAAS;AACrB,cAAM,MAAM,aAAa,OAAO;AAChC;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO,EAAE,SAAS,OAAO,OAAO,aAAa,kBAAA;AAC/C;"}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import { OALocation } from '../types';
|
|
2
|
-
export interface FetchArticle {
|
|
3
|
-
dirName: string;
|
|
4
|
-
oaLocations: OALocation[];
|
|
5
|
-
pmcid?: string;
|
|
6
|
-
}
|
|
7
|
-
export interface FetchOptions {
|
|
8
|
-
concurrency?: number;
|
|
9
|
-
retries?: number;
|
|
10
|
-
retryDelay?: number;
|
|
11
|
-
onProgress?: (progress: {
|
|
12
|
-
completed: number;
|
|
13
|
-
total: number;
|
|
14
|
-
dirName: string;
|
|
15
|
-
}) => void;
|
|
16
|
-
sourceFilter?: string[];
|
|
17
|
-
}
|
|
18
|
-
export interface FetchResult {
|
|
19
|
-
dirName: string;
|
|
20
|
-
status: 'downloaded' | 'failed' | 'skipped';
|
|
21
|
-
filesDownloaded?: string[];
|
|
22
|
-
error?: string;
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Fetch fulltext for a single article.
|
|
26
|
-
* Downloads PDF from the best available source, plus PMC XML if available.
|
|
27
|
-
*/
|
|
28
|
-
export declare function fetchFulltext(article: FetchArticle, sessionDir: string, options?: FetchOptions): Promise<FetchResult>;
|
|
29
|
-
/**
|
|
30
|
-
* Fetch fulltexts for multiple articles with concurrency control.
|
|
31
|
-
*/
|
|
32
|
-
export declare function fetchAllFulltexts(articles: FetchArticle[], sessionDir: string, options?: FetchOptions): Promise<FetchResult[]>;
|
|
33
|
-
//# sourceMappingURL=orchestrator.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.d.ts","sourceRoot":"","sources":["../../../src/fulltext/download/orchestrator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH,OAAO,KAAK,EAAE,UAAU,EAA0B,MAAM,UAAU,CAAC;AASnE,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,UAAU,EAAE,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,YAAY;IAC3B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,CAAC,QAAQ,EAAE;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,KAAK,IAAI,CAAC;IACvF,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,YAAY,GAAG,QAAQ,GAAG,SAAS,CAAC;IAC5C,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAgBD;;;GAGG;AACH,wBAAsB,aAAa,CACjC,OAAO,EAAE,YAAY,EACrB,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,WAAW,CAAC,CA4FtB;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,QAAQ,EAAE,YAAY,EAAE,EACxB,UAAU,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,WAAW,EAAE,CAAC,CAgCxB"}
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
import { mkdir } from "node:fs/promises";
|
|
2
|
-
import { join } from "node:path";
|
|
3
|
-
import { downloadPdf } from "./downloader.js";
|
|
4
|
-
import { downloadPmcXml } from "./pmc-xml.js";
|
|
5
|
-
import { loadMeta, saveMeta } from "../meta.js";
|
|
6
|
-
import { getArticleDir } from "../paths.js";
|
|
7
|
-
const SOURCE_PRIORITY = ["pmc", "arxiv", "unpaywall", "core", "publisher"];
|
|
8
|
-
function sortByPriority(locations) {
|
|
9
|
-
return [...locations].sort((a, b) => {
|
|
10
|
-
const aIdx = SOURCE_PRIORITY.indexOf(a.source);
|
|
11
|
-
const bIdx = SOURCE_PRIORITY.indexOf(b.source);
|
|
12
|
-
return (aIdx === -1 ? 999 : aIdx) - (bIdx === -1 ? 999 : bIdx);
|
|
13
|
-
});
|
|
14
|
-
}
|
|
15
|
-
function getPdfLocations(locations) {
|
|
16
|
-
return locations.filter((loc) => loc.urlType === "pdf");
|
|
17
|
-
}
|
|
18
|
-
async function fetchFulltext(article, sessionDir, options) {
|
|
19
|
-
const articleDir = getArticleDir(sessionDir, article.dirName);
|
|
20
|
-
const metaPath = join(articleDir, "meta.json");
|
|
21
|
-
let meta;
|
|
22
|
-
try {
|
|
23
|
-
meta = await loadMeta(metaPath);
|
|
24
|
-
} catch {
|
|
25
|
-
return { dirName: article.dirName, status: "failed", error: "meta.json not found" };
|
|
26
|
-
}
|
|
27
|
-
if (meta.files.pdf) {
|
|
28
|
-
return { dirName: article.dirName, status: "skipped" };
|
|
29
|
-
}
|
|
30
|
-
await mkdir(articleDir, { recursive: true });
|
|
31
|
-
const filesDownloaded = [];
|
|
32
|
-
let pdfFileInfo;
|
|
33
|
-
let xmlFileInfo;
|
|
34
|
-
let locations = article.oaLocations;
|
|
35
|
-
if (options?.sourceFilter && options.sourceFilter.length > 0) {
|
|
36
|
-
locations = locations.filter((loc) => options.sourceFilter?.includes(loc.source));
|
|
37
|
-
}
|
|
38
|
-
const pdfLocations = sortByPriority(getPdfLocations(locations));
|
|
39
|
-
for (const loc of pdfLocations) {
|
|
40
|
-
const pdfPath = join(articleDir, "fulltext.pdf");
|
|
41
|
-
const downloadResult = await downloadPdf(loc.url, pdfPath, {
|
|
42
|
-
retries: options?.retries ?? 3,
|
|
43
|
-
retryDelay: options?.retryDelay ?? 1e3
|
|
44
|
-
});
|
|
45
|
-
if (downloadResult.success) {
|
|
46
|
-
filesDownloaded.push("fulltext.pdf");
|
|
47
|
-
const info = {
|
|
48
|
-
filename: "fulltext.pdf",
|
|
49
|
-
source: loc.source,
|
|
50
|
-
retrievedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
51
|
-
};
|
|
52
|
-
if (downloadResult.size !== void 0) info.size = downloadResult.size;
|
|
53
|
-
pdfFileInfo = info;
|
|
54
|
-
break;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
if (article.pmcid) {
|
|
58
|
-
const xmlPath = join(articleDir, "fulltext.xml");
|
|
59
|
-
const xmlResult = await downloadPmcXml(article.pmcid, xmlPath);
|
|
60
|
-
if (xmlResult.success) {
|
|
61
|
-
filesDownloaded.push("fulltext.xml");
|
|
62
|
-
const info = {
|
|
63
|
-
filename: "fulltext.xml",
|
|
64
|
-
source: "pmc",
|
|
65
|
-
retrievedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
66
|
-
};
|
|
67
|
-
if (xmlResult.size !== void 0) info.size = xmlResult.size;
|
|
68
|
-
xmlFileInfo = info;
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
if (filesDownloaded.length === 0) {
|
|
72
|
-
return {
|
|
73
|
-
dirName: article.dirName,
|
|
74
|
-
status: "failed",
|
|
75
|
-
error: "All download sources failed"
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
const updatedMeta = {
|
|
79
|
-
...meta,
|
|
80
|
-
files: {
|
|
81
|
-
...meta.files,
|
|
82
|
-
...pdfFileInfo ? { pdf: pdfFileInfo } : {},
|
|
83
|
-
...xmlFileInfo ? { xml: xmlFileInfo } : {}
|
|
84
|
-
}
|
|
85
|
-
};
|
|
86
|
-
await saveMeta(metaPath, updatedMeta);
|
|
87
|
-
return {
|
|
88
|
-
dirName: article.dirName,
|
|
89
|
-
status: "downloaded",
|
|
90
|
-
filesDownloaded
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
async function fetchAllFulltexts(articles, sessionDir, options) {
|
|
94
|
-
const concurrency = options?.concurrency ?? 3;
|
|
95
|
-
const results = new Array(articles.length);
|
|
96
|
-
let nextIndex = 0;
|
|
97
|
-
let completed = 0;
|
|
98
|
-
async function worker() {
|
|
99
|
-
while (nextIndex < articles.length) {
|
|
100
|
-
const index = nextIndex++;
|
|
101
|
-
const article = articles[index];
|
|
102
|
-
if (!article) continue;
|
|
103
|
-
results[index] = await fetchFulltext(article, sessionDir, options);
|
|
104
|
-
completed++;
|
|
105
|
-
if (options?.onProgress) {
|
|
106
|
-
options.onProgress({
|
|
107
|
-
completed,
|
|
108
|
-
total: articles.length,
|
|
109
|
-
dirName: article.dirName
|
|
110
|
-
});
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
const workers = Array.from(
|
|
115
|
-
{ length: Math.min(concurrency, articles.length) },
|
|
116
|
-
() => worker()
|
|
117
|
-
);
|
|
118
|
-
await Promise.all(workers);
|
|
119
|
-
return results;
|
|
120
|
-
}
|
|
121
|
-
export {
|
|
122
|
-
fetchAllFulltexts,
|
|
123
|
-
fetchFulltext
|
|
124
|
-
};
|
|
125
|
-
//# sourceMappingURL=orchestrator.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.js","sources":["../../../src/fulltext/download/orchestrator.ts"],"sourcesContent":["/**\n * Fulltext fetch orchestrator.\n * Coordinates downloads from multiple OA sources with priority-based selection.\n */\n\nimport { mkdir } from 'node:fs/promises';\nimport { join } from 'node:path';\nimport type { OALocation, FulltextMeta, FileInfo } from '../types';\nimport { downloadPdf } from './downloader';\nimport { downloadPmcXml } from './pmc-xml';\nimport { loadMeta, saveMeta } from '../meta';\nimport { getArticleDir } from '../paths';\n\n/** Source priority order (lower index = higher priority) */\nconst SOURCE_PRIORITY: string[] = ['pmc', 'arxiv', 'unpaywall', 'core', 'publisher'];\n\nexport interface FetchArticle {\n dirName: string;\n oaLocations: OALocation[];\n pmcid?: string;\n}\n\nexport interface FetchOptions {\n concurrency?: number;\n retries?: number;\n retryDelay?: number;\n onProgress?: (progress: { completed: number; total: number; dirName: string }) => void;\n sourceFilter?: string[];\n}\n\nexport interface FetchResult {\n dirName: string;\n status: 'downloaded' | 'failed' | 'skipped';\n filesDownloaded?: string[];\n error?: string;\n}\n\n/** Sort OA locations by source priority */\nfunction sortByPriority(locations: OALocation[]): OALocation[] {\n return [...locations].sort((a, b) => {\n const aIdx = SOURCE_PRIORITY.indexOf(a.source);\n const bIdx = SOURCE_PRIORITY.indexOf(b.source);\n return (aIdx === -1 ? 999 : aIdx) - (bIdx === -1 ? 999 : bIdx);\n });\n}\n\n/** Get only PDF-type locations */\nfunction getPdfLocations(locations: OALocation[]): OALocation[] {\n return locations.filter((loc) => loc.urlType === 'pdf');\n}\n\n/**\n * Fetch fulltext for a single article.\n * Downloads PDF from the best available source, plus PMC XML if available.\n */\nexport async function fetchFulltext(\n article: FetchArticle,\n sessionDir: string,\n options?: FetchOptions,\n): Promise<FetchResult> {\n const articleDir = getArticleDir(sessionDir, article.dirName);\n const metaPath = join(articleDir, 'meta.json');\n\n // Load meta to check existing files\n let meta: FulltextMeta;\n try {\n meta = await loadMeta(metaPath);\n } catch {\n return { dirName: article.dirName, status: 'failed', error: 'meta.json not found' };\n }\n\n // Skip if already has PDF\n if (meta.files.pdf) {\n return { dirName: article.dirName, status: 'skipped' };\n }\n\n // Ensure directory exists\n await mkdir(articleDir, { recursive: true });\n\n const filesDownloaded: string[] = [];\n let pdfFileInfo: FileInfo | undefined;\n let xmlFileInfo: FileInfo | undefined;\n\n // Filter and sort locations by priority\n let locations = article.oaLocations;\n if (options?.sourceFilter && options.sourceFilter.length > 0) {\n locations = locations.filter((loc) => options.sourceFilter?.includes(loc.source));\n }\n const pdfLocations = sortByPriority(getPdfLocations(locations));\n\n // Try downloading PDF from best source, falling back to next on failure\n for (const loc of pdfLocations) {\n const pdfPath = join(articleDir, 'fulltext.pdf');\n const downloadResult = await downloadPdf(loc.url, pdfPath, {\n retries: options?.retries ?? 3,\n retryDelay: options?.retryDelay ?? 1000,\n });\n\n if (downloadResult.success) {\n filesDownloaded.push('fulltext.pdf');\n const info: FileInfo = {\n filename: 'fulltext.pdf',\n source: loc.source,\n retrievedAt: new Date().toISOString(),\n };\n if (downloadResult.size !== undefined) info.size = downloadResult.size;\n pdfFileInfo = info;\n break;\n }\n }\n\n // Download PMC XML if pmcid available\n if (article.pmcid) {\n const xmlPath = join(articleDir, 'fulltext.xml');\n const xmlResult = await downloadPmcXml(article.pmcid, xmlPath);\n if (xmlResult.success) {\n filesDownloaded.push('fulltext.xml');\n const info: FileInfo = {\n filename: 'fulltext.xml',\n source: 'pmc',\n retrievedAt: new Date().toISOString(),\n };\n if (xmlResult.size !== undefined) info.size = xmlResult.size;\n xmlFileInfo = info;\n }\n }\n\n if (filesDownloaded.length === 0) {\n return {\n dirName: article.dirName,\n status: 'failed',\n error: 'All download sources failed',\n };\n }\n\n // Update meta.json with new file info\n const updatedMeta: FulltextMeta = {\n ...meta,\n files: {\n ...meta.files,\n ...(pdfFileInfo ? { pdf: pdfFileInfo } : {}),\n ...(xmlFileInfo ? { xml: xmlFileInfo } : {}),\n },\n };\n await saveMeta(metaPath, updatedMeta);\n\n return {\n dirName: article.dirName,\n status: 'downloaded',\n filesDownloaded,\n };\n}\n\n/**\n * Fetch fulltexts for multiple articles with concurrency control.\n */\nexport async function fetchAllFulltexts(\n articles: FetchArticle[],\n sessionDir: string,\n options?: FetchOptions,\n): Promise<FetchResult[]> {\n const concurrency = options?.concurrency ?? 3;\n const results: FetchResult[] = new Array(articles.length);\n let nextIndex = 0;\n let completed = 0;\n\n async function worker(): Promise<void> {\n while (nextIndex < articles.length) {\n const index = nextIndex++;\n const article = articles[index];\n if (!article) continue;\n\n results[index] = await fetchFulltext(article, sessionDir, options);\n completed++;\n\n if (options?.onProgress) {\n options.onProgress({\n completed,\n total: articles.length,\n dirName: article.dirName,\n });\n }\n }\n }\n\n const workers = Array.from(\n { length: Math.min(concurrency, articles.length) },\n () => worker(),\n );\n await Promise.all(workers);\n\n return results;\n}\n"],"names":[],"mappings":";;;;;;AAcA,MAAM,kBAA4B,CAAC,OAAO,SAAS,aAAa,QAAQ,WAAW;AAwBnF,SAAS,eAAe,WAAuC;AAC7D,SAAO,CAAC,GAAG,SAAS,EAAE,KAAK,CAAC,GAAG,MAAM;AACnC,UAAM,OAAO,gBAAgB,QAAQ,EAAE,MAAM;AAC7C,UAAM,OAAO,gBAAgB,QAAQ,EAAE,MAAM;AAC7C,YAAQ,SAAS,KAAK,MAAM,SAAS,SAAS,KAAK,MAAM;AAAA,EAC3D,CAAC;AACH;AAGA,SAAS,gBAAgB,WAAuC;AAC9D,SAAO,UAAU,OAAO,CAAC,QAAQ,IAAI,YAAY,KAAK;AACxD;AAMA,eAAsB,cACpB,SACA,YACA,SACsB;AACtB,QAAM,aAAa,cAAc,YAAY,QAAQ,OAAO;AAC5D,QAAM,WAAW,KAAK,YAAY,WAAW;AAG7C,MAAI;AACJ,MAAI;AACF,WAAO,MAAM,SAAS,QAAQ;AAAA,EAChC,QAAQ;AACN,WAAO,EAAE,SAAS,QAAQ,SAAS,QAAQ,UAAU,OAAO,sBAAA;AAAA,EAC9D;AAGA,MAAI,KAAK,MAAM,KAAK;AAClB,WAAO,EAAE,SAAS,QAAQ,SAAS,QAAQ,UAAA;AAAA,EAC7C;AAGA,QAAM,MAAM,YAAY,EAAE,WAAW,MAAM;AAE3C,QAAM,kBAA4B,CAAA;AAClC,MAAI;AACJ,MAAI;AAGJ,MAAI,YAAY,QAAQ;AACxB,MAAI,SAAS,gBAAgB,QAAQ,aAAa,SAAS,GAAG;AAC5D,gBAAY,UAAU,OAAO,CAAC,QAAQ,QAAQ,cAAc,SAAS,IAAI,MAAM,CAAC;AAAA,EAClF;AACA,QAAM,eAAe,eAAe,gBAAgB,SAAS,CAAC;AAG9D,aAAW,OAAO,cAAc;AAC9B,UAAM,UAAU,KAAK,YAAY,cAAc;AAC/C,UAAM,iBAAiB,MAAM,YAAY,IAAI,KAAK,SAAS;AAAA,MACzD,SAAS,SAAS,WAAW;AAAA,MAC7B,YAAY,SAAS,cAAc;AAAA,IAAA,CACpC;AAED,QAAI,eAAe,SAAS;AAC1B,sBAAgB,KAAK,cAAc;AACnC,YAAM,OAAiB;AAAA,QACrB,UAAU;AAAA,QACV,QAAQ,IAAI;AAAA,QACZ,cAAa,oBAAI,KAAA,GAAO,YAAA;AAAA,MAAY;AAEtC,UAAI,eAAe,SAAS,OAAW,MAAK,OAAO,eAAe;AAClE,oBAAc;AACd;AAAA,IACF;AAAA,EACF;AAGA,MAAI,QAAQ,OAAO;AACjB,UAAM,UAAU,KAAK,YAAY,cAAc;AAC/C,UAAM,YAAY,MAAM,eAAe,QAAQ,OAAO,OAAO;AAC7D,QAAI,UAAU,SAAS;AACrB,sBAAgB,KAAK,cAAc;AACnC,YAAM,OAAiB;AAAA,QACrB,UAAU;AAAA,QACV,QAAQ;AAAA,QACR,cAAa,oBAAI,KAAA,GAAO,YAAA;AAAA,MAAY;AAEtC,UAAI,UAAU,SAAS,OAAW,MAAK,OAAO,UAAU;AACxD,oBAAc;AAAA,IAChB;AAAA,EACF;AAEA,MAAI,gBAAgB,WAAW,GAAG;AAChC,WAAO;AAAA,MACL,SAAS,QAAQ;AAAA,MACjB,QAAQ;AAAA,MACR,OAAO;AAAA,IAAA;AAAA,EAEX;AAGA,QAAM,cAA4B;AAAA,IAChC,GAAG;AAAA,IACH,OAAO;AAAA,MACL,GAAG,KAAK;AAAA,MACR,GAAI,cAAc,EAAE,KAAK,YAAA,IAAgB,CAAA;AAAA,MACzC,GAAI,cAAc,EAAE,KAAK,gBAAgB,CAAA;AAAA,IAAC;AAAA,EAC5C;AAEF,QAAM,SAAS,UAAU,WAAW;AAEpC,SAAO;AAAA,IACL,SAAS,QAAQ;AAAA,IACjB,QAAQ;AAAA,IACR;AAAA,EAAA;AAEJ;AAKA,eAAsB,kBACpB,UACA,YACA,SACwB;AACxB,QAAM,cAAc,SAAS,eAAe;AAC5C,QAAM,UAAyB,IAAI,MAAM,SAAS,MAAM;AACxD,MAAI,YAAY;AAChB,MAAI,YAAY;AAEhB,iBAAe,SAAwB;AACrC,WAAO,YAAY,SAAS,QAAQ;AAClC,YAAM,QAAQ;AACd,YAAM,UAAU,SAAS,KAAK;AAC9B,UAAI,CAAC,QAAS;AAEd,cAAQ,KAAK,IAAI,MAAM,cAAc,SAAS,YAAY,OAAO;AACjE;AAEA,UAAI,SAAS,YAAY;AACvB,gBAAQ,WAAW;AAAA,UACjB;AAAA,UACA,OAAO,SAAS;AAAA,UAChB,SAAS,QAAQ;AAAA,QAAA,CAClB;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,MAAM;AAAA,IACpB,EAAE,QAAQ,KAAK,IAAI,aAAa,SAAS,MAAM,EAAA;AAAA,IAC/C,MAAM,OAAA;AAAA,EAAO;AAEf,QAAM,QAAQ,IAAI,OAAO;AAEzB,SAAO;AACT;"}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* PMC XML downloader via E-utilities.
|
|
3
|
-
*/
|
|
4
|
-
export interface PmcXmlResult {
|
|
5
|
-
success: boolean;
|
|
6
|
-
size?: number;
|
|
7
|
-
error?: string;
|
|
8
|
-
}
|
|
9
|
-
/**
|
|
10
|
-
* Download PMC XML for a given PMCID via E-utilities efetch.
|
|
11
|
-
*/
|
|
12
|
-
export declare function downloadPmcXml(pmcid: string, destPath: string): Promise<PmcXmlResult>;
|
|
13
|
-
//# sourceMappingURL=pmc-xml.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pmc-xml.d.ts","sourceRoot":"","sources":["../../../src/fulltext/download/pmc-xml.ts"],"names":[],"mappings":"AAAA;;GAEG;AAuBH,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,MAAM,EACb,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,YAAY,CAAC,CAoCvB"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pmc-xml.js","sources":["../../../src/fulltext/download/pmc-xml.ts"],"sourcesContent":["/**\n * PMC XML downloader via E-utilities.\n */\n\nimport { writeFile, mkdir } from 'node:fs/promises';\nimport { dirname } from 'node:path';\n\nconst PMC_EFETCH_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi';\n\n/** Content types accepted as valid XML responses */\nconst VALID_XML_TYPES = ['text/xml', 'application/xml'];\n\nconst USER_AGENT = 'search-hub/0.8.0 (https://github.com/ncukondo/search-hub)';\n\nfunction isValidXmlContentType(contentType: string | null): boolean {\n if (!contentType) return false;\n const base = (contentType.split(';')[0] ?? '').trim().toLowerCase();\n return VALID_XML_TYPES.includes(base);\n}\n\n/** Strip \"PMC\" prefix if present, returning numeric ID */\nfunction normalizePmcid(pmcid: string): string {\n return pmcid.replace(/^PMC/i, '');\n}\n\nexport interface PmcXmlResult {\n success: boolean;\n size?: number;\n error?: string;\n}\n\n/**\n * Download PMC XML for a given PMCID via E-utilities efetch.\n */\nexport async function downloadPmcXml(\n pmcid: string,\n destPath: string,\n): Promise<PmcXmlResult> {\n const numericId = normalizePmcid(pmcid);\n const url = `${PMC_EFETCH_URL}?db=pmc&id=${numericId}&rettype=xml`;\n\n try {\n const response = await fetch(url, {\n headers: { 'User-Agent': USER_AGENT },\n });\n\n if (!response.ok) {\n return {\n success: false,\n error: `HTTP ${response.status} ${response.statusText}`,\n };\n }\n\n const contentType = response.headers.get('content-type');\n if (!isValidXmlContentType(contentType)) {\n return {\n success: false,\n error: `Unexpected Content-Type: ${contentType ?? 'none'} (expected XML)`,\n };\n }\n\n const text = await response.text();\n\n await mkdir(dirname(destPath), { recursive: true });\n await writeFile(destPath, text, 'utf-8');\n\n return { success: true, size: Buffer.byteLength(text) };\n } catch (err) {\n return {\n success: false,\n error: err instanceof Error ? err.message : String(err),\n };\n }\n}\n"],"names":[],"mappings":";;AAOA,MAAM,iBAAiB;AAGvB,MAAM,kBAAkB,CAAC,YAAY,iBAAiB;AAEtD,MAAM,aAAa;AAEnB,SAAS,sBAAsB,aAAqC;AAClE,MAAI,CAAC,YAAa,QAAO;AACzB,QAAM,QAAQ,YAAY,MAAM,GAAG,EAAE,CAAC,KAAK,IAAI,KAAA,EAAO,YAAA;AACtD,SAAO,gBAAgB,SAAS,IAAI;AACtC;AAGA,SAAS,eAAe,OAAuB;AAC7C,SAAO,MAAM,QAAQ,SAAS,EAAE;AAClC;AAWA,eAAsB,eACpB,OACA,UACuB;AACvB,QAAM,YAAY,eAAe,KAAK;AACtC,QAAM,MAAM,GAAG,cAAc,cAAc,SAAS;AAEpD,MAAI;AACF,UAAM,WAAW,MAAM,MAAM,KAAK;AAAA,MAChC,SAAS,EAAE,cAAc,WAAA;AAAA,IAAW,CACrC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,QAAQ,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,MAAA;AAAA,IAEzD;AAEA,UAAM,cAAc,SAAS,QAAQ,IAAI,cAAc;AACvD,QAAI,CAAC,sBAAsB,WAAW,GAAG;AACvC,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,4BAA4B,eAAe,MAAM;AAAA,MAAA;AAAA,IAE5D;AAEA,UAAM,OAAO,MAAM,SAAS,KAAA;AAE5B,UAAM,MAAM,QAAQ,QAAQ,GAAG,EAAE,WAAW,MAAM;AAClD,UAAM,UAAU,UAAU,MAAM,OAAO;AAEvC,WAAO,EAAE,SAAS,MAAM,MAAM,OAAO,WAAW,IAAI,EAAA;AAAA,EACtD,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,IAAA;AAAA,EAE1D;AACF;"}
|
package/dist/fulltext/meta.d.ts
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { FulltextMeta, FileInfo } from './types.js';
|
|
2
|
-
export interface CreateMetaOptions {
|
|
3
|
-
citationKey: string;
|
|
4
|
-
uuid: string;
|
|
5
|
-
title: string;
|
|
6
|
-
doi?: string;
|
|
7
|
-
pmid?: string;
|
|
8
|
-
pmcid?: string;
|
|
9
|
-
arxivId?: string;
|
|
10
|
-
authors?: string;
|
|
11
|
-
year?: string;
|
|
12
|
-
}
|
|
13
|
-
/** Create a new FulltextMeta object. */
|
|
14
|
-
export declare function createMeta(options: CreateMetaOptions): FulltextMeta;
|
|
15
|
-
/** Load and parse a meta.json file. */
|
|
16
|
-
export declare function loadMeta(path: string): Promise<FulltextMeta>;
|
|
17
|
-
/** Save a FulltextMeta to a meta.json file with 2-space indentation. */
|
|
18
|
-
export declare function saveMeta(path: string, meta: FulltextMeta): Promise<void>;
|
|
19
|
-
/** Update the files section of a FulltextMeta, preserving existing files. */
|
|
20
|
-
export declare function updateMetaFiles(meta: FulltextMeta, files: {
|
|
21
|
-
pdf?: FileInfo;
|
|
22
|
-
xml?: FileInfo;
|
|
23
|
-
markdown?: FileInfo;
|
|
24
|
-
}): FulltextMeta;
|
|
25
|
-
//# sourceMappingURL=meta.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"meta.d.ts","sourceRoot":"","sources":["../../src/fulltext/meta.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEzD,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,wCAAwC;AACxC,wBAAgB,UAAU,CAAC,OAAO,EAAE,iBAAiB,GAAG,YAAY,CAmBnE;AAED,uCAAuC;AACvC,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAGlE;AAED,wEAAwE;AACxE,wBAAsB,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAG9E;AAED,6EAA6E;AAC7E,wBAAgB,eAAe,CAC7B,IAAI,EAAE,YAAY,EAClB,KAAK,EAAE;IAAE,GAAG,CAAC,EAAE,QAAQ,CAAC;IAAC,GAAG,CAAC,EAAE,QAAQ,CAAC;IAAC,QAAQ,CAAC,EAAE,QAAQ,CAAA;CAAE,GAC7D,YAAY,CAUd"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"meta.js","sources":["../../src/fulltext/meta.ts"],"sourcesContent":["/**\n * Meta.json management for fulltext article directories.\n */\n\nimport { readFile, writeFile } from 'node:fs/promises';\nimport type { FulltextMeta, FileInfo } from './types.js';\n\nexport interface CreateMetaOptions {\n citationKey: string;\n uuid: string;\n title: string;\n doi?: string;\n pmid?: string;\n pmcid?: string;\n arxivId?: string;\n authors?: string;\n year?: string;\n}\n\n/** Create a new FulltextMeta object. */\nexport function createMeta(options: CreateMetaOptions): FulltextMeta {\n const uuid8 = options.uuid.slice(0, 8);\n const meta: FulltextMeta = {\n dirName: `${options.citationKey}-${uuid8}`,\n citationKey: options.citationKey,\n uuid: options.uuid,\n title: options.title,\n oaStatus: 'unchecked',\n files: {},\n };\n\n if (options.doi !== undefined) meta.doi = options.doi;\n if (options.pmid !== undefined) meta.pmid = options.pmid;\n if (options.pmcid !== undefined) meta.pmcid = options.pmcid;\n if (options.arxivId !== undefined) meta.arxivId = options.arxivId;\n if (options.authors !== undefined) meta.authors = options.authors;\n if (options.year !== undefined) meta.year = options.year;\n\n return meta;\n}\n\n/** Load and parse a meta.json file. */\nexport async function loadMeta(path: string): Promise<FulltextMeta> {\n const raw = await readFile(path, 'utf-8');\n return JSON.parse(raw) as FulltextMeta;\n}\n\n/** Save a FulltextMeta to a meta.json file with 2-space indentation. */\nexport async function saveMeta(path: string, meta: FulltextMeta): Promise<void> {\n const json = JSON.stringify(meta, null, 2);\n await writeFile(path, json + '\\n', 'utf-8');\n}\n\n/** Update the files section of a FulltextMeta, preserving existing files. */\nexport function updateMetaFiles(\n meta: FulltextMeta,\n files: { pdf?: FileInfo; xml?: FileInfo; markdown?: FileInfo },\n): FulltextMeta {\n return {\n ...meta,\n files: {\n ...meta.files,\n ...Object.fromEntries(\n Object.entries(files).filter(([, v]) => v !== undefined),\n ),\n },\n };\n}\n"],"names":[],"mappings":";AAoBO,SAAS,WAAW,SAA0C;AACnE,QAAM,QAAQ,QAAQ,KAAK,MAAM,GAAG,CAAC;AACrC,QAAM,OAAqB;AAAA,IACzB,SAAS,GAAG,QAAQ,WAAW,IAAI,KAAK;AAAA,IACxC,aAAa,QAAQ;AAAA,IACrB,MAAM,QAAQ;AAAA,IACd,OAAO,QAAQ;AAAA,IACf,UAAU;AAAA,IACV,OAAO,CAAA;AAAA,EAAC;AAGV,MAAI,QAAQ,QAAQ,OAAW,MAAK,MAAM,QAAQ;AAClD,MAAI,QAAQ,SAAS,OAAW,MAAK,OAAO,QAAQ;AACpD,MAAI,QAAQ,UAAU,OAAW,MAAK,QAAQ,QAAQ;AACtD,MAAI,QAAQ,YAAY,OAAW,MAAK,UAAU,QAAQ;AAC1D,MAAI,QAAQ,YAAY,OAAW,MAAK,UAAU,QAAQ;AAC1D,MAAI,QAAQ,SAAS,OAAW,MAAK,OAAO,QAAQ;AAEpD,SAAO;AACT;AAGA,eAAsB,SAAS,MAAqC;AAClE,QAAM,MAAM,MAAM,SAAS,MAAM,OAAO;AACxC,SAAO,KAAK,MAAM,GAAG;AACvB;AAGA,eAAsB,SAAS,MAAc,MAAmC;AAC9E,QAAM,OAAO,KAAK,UAAU,MAAM,MAAM,CAAC;AACzC,QAAM,UAAU,MAAM,OAAO,MAAM,OAAO;AAC5C;AAGO,SAAS,gBACd,MACA,OACc;AACd,SAAO;AAAA,IACL,GAAG;AAAA,IACH,OAAO;AAAA,MACL,GAAG,KAAK;AAAA,MACR,GAAG,OAAO;AAAA,QACR,OAAO,QAAQ,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC,MAAM,MAAM,MAAS;AAAA,MAAA;AAAA,IACzD;AAAA,EACF;AAEJ;"}
|
package/dist/fulltext/paths.d.ts
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Path resolution utilities for fulltext directories.
|
|
3
|
-
*/
|
|
4
|
-
/** Get the fulltext root directory for a session. */
|
|
5
|
-
export declare function getFulltextDir(sessionDir: string): string;
|
|
6
|
-
/** Get an article's fulltext directory. */
|
|
7
|
-
export declare function getArticleDir(sessionDir: string, dirName: string): string;
|
|
8
|
-
/** Get the meta.json path for an article. */
|
|
9
|
-
export declare function getMetaPath(sessionDir: string, dirName: string): string;
|
|
10
|
-
/** Get the README.md path for an article. */
|
|
11
|
-
export declare function getReadmePath(sessionDir: string, dirName: string): string;
|
|
12
|
-
//# sourceMappingURL=paths.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"paths.d.ts","sourceRoot":"","sources":["../../src/fulltext/paths.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,qDAAqD;AACrD,wBAAgB,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,2CAA2C;AAC3C,wBAAgB,aAAa,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAEzE;AAED,6CAA6C;AAC7C,wBAAgB,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAEvE;AAED,6CAA6C;AAC7C,wBAAgB,aAAa,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAEzE"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"paths.js","sources":["../../src/fulltext/paths.ts"],"sourcesContent":["/**\n * Path resolution utilities for fulltext directories.\n */\n\nimport { join } from 'node:path';\n\n/** Get the fulltext root directory for a session. */\nexport function getFulltextDir(sessionDir: string): string {\n return join(sessionDir, 'fulltext');\n}\n\n/** Get an article's fulltext directory. */\nexport function getArticleDir(sessionDir: string, dirName: string): string {\n return join(sessionDir, 'fulltext', dirName);\n}\n\n/** Get the meta.json path for an article. */\nexport function getMetaPath(sessionDir: string, dirName: string): string {\n return join(sessionDir, 'fulltext', dirName, 'meta.json');\n}\n\n/** Get the README.md path for an article. */\nexport function getReadmePath(sessionDir: string, dirName: string): string {\n return join(sessionDir, 'fulltext', dirName, 'README.md');\n}\n"],"names":[],"mappings":";AAOO,SAAS,eAAe,YAA4B;AACzD,SAAO,KAAK,YAAY,UAAU;AACpC;AAGO,SAAS,cAAc,YAAoB,SAAyB;AACzE,SAAO,KAAK,YAAY,YAAY,OAAO;AAC7C;AAGO,SAAS,YAAY,YAAoB,SAAyB;AACvE,SAAO,KAAK,YAAY,YAAY,SAAS,WAAW;AAC1D;AAGO,SAAS,cAAc,YAAoB,SAAyB;AACzE,SAAO,KAAK,YAAY,YAAY,SAAS,WAAW;AAC1D;"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"readme.d.ts","sourceRoot":"","sources":["../../src/fulltext/readme.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE/C,gEAAgE;AAChE,wBAAgB,cAAc,CAAC,IAAI,EAAE,YAAY,GAAG,MAAM,CAiEzD"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"readme.js","sources":["../../src/fulltext/readme.ts"],"sourcesContent":["/**\n * README.md template generation for article fulltext directories.\n */\n\nimport type { FulltextMeta } from './types.js';\n\n/** Generate a README.md for an article's fulltext directory. */\nexport function generateReadme(meta: FulltextMeta): string {\n const lines: string[] = [];\n\n // Heading\n lines.push(`# ${meta.citationKey}`);\n lines.push('');\n lines.push(`**Title**: ${meta.title}`);\n\n if (meta.authors) {\n lines.push(`**Authors**: ${meta.authors}`);\n }\n if (meta.year) {\n lines.push(`**Year**: ${meta.year}`);\n }\n\n // Identifiers\n const identifiers: string[] = [];\n if (meta.doi) identifiers.push(`- DOI: ${meta.doi}`);\n if (meta.pmid) identifiers.push(`- PMID: ${meta.pmid}`);\n if (meta.pmcid) identifiers.push(`- PMC: ${meta.pmcid}`);\n if (meta.arxivId) identifiers.push(`- arXiv: ${meta.arxivId}`);\n\n if (identifiers.length > 0) {\n lines.push('');\n lines.push('## Identifiers');\n lines.push('');\n lines.push(...identifiers);\n }\n\n // Download URLs\n const urls: string[] = [];\n if (meta.doi) {\n urls.push(`- Publisher: https://doi.org/${meta.doi}`);\n }\n if (meta.pmcid) {\n const pmcNum = meta.pmcid.replace(/^PMC/i, '');\n urls.push(`- PMC PDF: https://www.ncbi.nlm.nih.gov/pmc/articles/${meta.pmcid}/pdf/`);\n urls.push(`- PMC XML: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pmc&id=${pmcNum}`);\n }\n if (meta.arxivId) {\n urls.push(`- arXiv PDF: https://arxiv.org/pdf/${meta.arxivId}.pdf`);\n }\n\n if (urls.length > 0) {\n lines.push('');\n lines.push('## Download URLs');\n lines.push('');\n lines.push(...urls);\n }\n\n // Instructions\n lines.push('');\n lines.push('## Instructions');\n lines.push('');\n lines.push('Place fulltext files in this directory:');\n lines.push('- `fulltext.pdf` - PDF version');\n lines.push('- `fulltext.md` - Markdown version (optional)');\n lines.push('');\n lines.push('After adding files, run:');\n lines.push('```');\n lines.push('search-hub fulltext sync <session-id>');\n lines.push('```');\n lines.push('');\n\n return lines.join('\\n');\n}\n"],"names":[],"mappings":"AAOO,SAAS,eAAe,MAA4B;AACzD,QAAM,QAAkB,CAAA;AAGxB,QAAM,KAAK,KAAK,KAAK,WAAW,EAAE;AAClC,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,cAAc,KAAK,KAAK,EAAE;AAErC,MAAI,KAAK,SAAS;AAChB,UAAM,KAAK,gBAAgB,KAAK,OAAO,EAAE;AAAA,EAC3C;AACA,MAAI,KAAK,MAAM;AACb,UAAM,KAAK,aAAa,KAAK,IAAI,EAAE;AAAA,EACrC;AAGA,QAAM,cAAwB,CAAA;AAC9B,MAAI,KAAK,IAAK,aAAY,KAAK,UAAU,KAAK,GAAG,EAAE;AACnD,MAAI,KAAK,KAAM,aAAY,KAAK,WAAW,KAAK,IAAI,EAAE;AACtD,MAAI,KAAK,MAAO,aAAY,KAAK,UAAU,KAAK,KAAK,EAAE;AACvD,MAAI,KAAK,QAAS,aAAY,KAAK,YAAY,KAAK,OAAO,EAAE;AAE7D,MAAI,YAAY,SAAS,GAAG;AAC1B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,gBAAgB;AAC3B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,GAAG,WAAW;AAAA,EAC3B;AAGA,QAAM,OAAiB,CAAA;AACvB,MAAI,KAAK,KAAK;AACZ,SAAK,KAAK,gCAAgC,KAAK,GAAG,EAAE;AAAA,EACtD;AACA,MAAI,KAAK,OAAO;AACd,UAAM,SAAS,KAAK,MAAM,QAAQ,SAAS,EAAE;AAC7C,SAAK,KAAK,wDAAwD,KAAK,KAAK,OAAO;AACnF,SAAK,KAAK,kFAAkF,MAAM,EAAE;AAAA,EACtG;AACA,MAAI,KAAK,SAAS;AAChB,SAAK,KAAK,sCAAsC,KAAK,OAAO,MAAM;AAAA,EACpE;AAEA,MAAI,KAAK,SAAS,GAAG;AACnB,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,kBAAkB;AAC7B,UAAM,KAAK,EAAE;AACb,UAAM,KAAK,GAAG,IAAI;AAAA,EACpB;AAGA,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,iBAAiB;AAC5B,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,yCAAyC;AACpD,QAAM,KAAK,gCAAgC;AAC3C,QAAM,KAAK,+CAA+C;AAC1D,QAAM,KAAK,EAAE;AACb,QAAM,KAAK,0BAA0B;AACrC,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,uCAAuC;AAClD,QAAM,KAAK,KAAK;AAChB,QAAM,KAAK,EAAE;AAEb,SAAO,MAAM,KAAK,IAAI;AACxB;"}
|
package/dist/fulltext/types.d.ts
DELETED
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fulltext management type definitions.
|
|
3
|
-
* Defines interfaces for article fulltext storage, metadata, and indexing.
|
|
4
|
-
*/
|
|
5
|
-
/**
|
|
6
|
-
* Information about a retrieved or manually added file.
|
|
7
|
-
*/
|
|
8
|
-
export interface FileInfo {
|
|
9
|
-
/** Fixed filename: "fulltext.pdf", "fulltext.xml", or "fulltext.md" */
|
|
10
|
-
filename: string;
|
|
11
|
-
/** How the file was obtained: "pmc", "arxiv", "unpaywall", "manual", etc. */
|
|
12
|
-
source: string;
|
|
13
|
-
/** ISO 8601 timestamp when the file was retrieved/added */
|
|
14
|
-
retrievedAt: string;
|
|
15
|
-
/** File size in bytes */
|
|
16
|
-
size?: number;
|
|
17
|
-
/** For markdown: source file it was converted from (e.g., "fulltext.xml") */
|
|
18
|
-
convertedFrom?: string;
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* An Open Access location discovered for an article.
|
|
22
|
-
*/
|
|
23
|
-
export interface OALocation {
|
|
24
|
-
/** Discovery source */
|
|
25
|
-
source: 'unpaywall' | 'pmc' | 'arxiv' | 'core' | 'publisher';
|
|
26
|
-
/** URL to the fulltext */
|
|
27
|
-
url: string;
|
|
28
|
-
/** Type of content at the URL */
|
|
29
|
-
urlType: 'pdf' | 'xml' | 'html' | 'repository';
|
|
30
|
-
/** Version of the article */
|
|
31
|
-
version: 'published' | 'accepted' | 'submitted';
|
|
32
|
-
/** License identifier (e.g., "cc-by") */
|
|
33
|
-
license?: string;
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* OA status of an article.
|
|
37
|
-
*/
|
|
38
|
-
export type OAStatus = 'open' | 'closed' | 'unknown' | 'unchecked';
|
|
39
|
-
/**
|
|
40
|
-
* Metadata for a single article's fulltext directory (meta.json).
|
|
41
|
-
*/
|
|
42
|
-
export interface FulltextMeta {
|
|
43
|
-
/** Directory name: "{citationKey}-{uuid8}" */
|
|
44
|
-
dirName: string;
|
|
45
|
-
/** Citation key: e.g., "smith2024" */
|
|
46
|
-
citationKey: string;
|
|
47
|
-
/** Full UUID for uniqueness */
|
|
48
|
-
uuid: string;
|
|
49
|
-
doi?: string;
|
|
50
|
-
pmid?: string;
|
|
51
|
-
pmcid?: string;
|
|
52
|
-
arxivId?: string;
|
|
53
|
-
/** Article title */
|
|
54
|
-
title: string;
|
|
55
|
-
/** Authors as a display string */
|
|
56
|
-
authors?: string;
|
|
57
|
-
/** Publication year */
|
|
58
|
-
year?: string;
|
|
59
|
-
/** Current OA status */
|
|
60
|
-
oaStatus: OAStatus;
|
|
61
|
-
/** Discovered OA locations */
|
|
62
|
-
oaLocations?: OALocation[];
|
|
63
|
-
/** ISO 8601 timestamp of last OA check */
|
|
64
|
-
checkedAt?: string;
|
|
65
|
-
files: {
|
|
66
|
-
pdf?: FileInfo;
|
|
67
|
-
xml?: FileInfo;
|
|
68
|
-
markdown?: FileInfo;
|
|
69
|
-
};
|
|
70
|
-
pendingDownload?: {
|
|
71
|
-
suggestedUrls: string[];
|
|
72
|
-
/** ISO 8601 timestamp */
|
|
73
|
-
addedAt: string;
|
|
74
|
-
};
|
|
75
|
-
}
|
|
76
|
-
/**
|
|
77
|
-
* Fulltext reference stored in ArticleEntry (reviews.yaml extension).
|
|
78
|
-
* Links an article in reviews.yaml to its fulltext directory.
|
|
79
|
-
*/
|
|
80
|
-
export interface ArticleFulltextRef {
|
|
81
|
-
/** Directory name: "{citationKey}-{uuid8}" — links to fulltext/<dirName>/ */
|
|
82
|
-
dirName: string;
|
|
83
|
-
/** Which file types are available */
|
|
84
|
-
hasFiles: {
|
|
85
|
-
pdf: boolean;
|
|
86
|
-
xml: boolean;
|
|
87
|
-
markdown: boolean;
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
//# sourceMappingURL=types.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/fulltext/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,uEAAuE;IACvE,QAAQ,EAAE,MAAM,CAAC;IACjB,6EAA6E;IAC7E,MAAM,EAAE,MAAM,CAAC;IACf,2DAA2D;IAC3D,WAAW,EAAE,MAAM,CAAC;IACpB,yBAAyB;IACzB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,uBAAuB;IACvB,MAAM,EAAE,WAAW,GAAG,KAAK,GAAG,OAAO,GAAG,MAAM,GAAG,WAAW,CAAC;IAC7D,0BAA0B;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,iCAAiC;IACjC,OAAO,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,YAAY,CAAC;IAC/C,6BAA6B;IAC7B,OAAO,EAAE,WAAW,GAAG,UAAU,GAAG,WAAW,CAAC;IAChD,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,QAAQ,GAAG,SAAS,GAAG,WAAW,CAAC;AAEnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8CAA8C;IAC9C,OAAO,EAAE,MAAM,CAAC;IAChB,sCAAsC;IACtC,WAAW,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,IAAI,EAAE,MAAM,CAAC;IAGb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IAGjB,oBAAoB;IACpB,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uBAAuB;IACvB,IAAI,CAAC,EAAE,MAAM,CAAC;IAGd,wBAAwB;IACxB,QAAQ,EAAE,QAAQ,CAAC;IACnB,8BAA8B;IAC9B,WAAW,CAAC,EAAE,UAAU,EAAE,CAAC;IAC3B,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IAGnB,KAAK,EAAE;QACL,GAAG,CAAC,EAAE,QAAQ,CAAC;QACf,GAAG,CAAC,EAAE,QAAQ,CAAC;QACf,QAAQ,CAAC,EAAE,QAAQ,CAAC;KACrB,CAAC;IAGF,eAAe,CAAC,EAAE;QAChB,aAAa,EAAE,MAAM,EAAE,CAAC;QACxB,yBAAyB;QACzB,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAkB;IACjC,6EAA6E;IAC7E,OAAO,EAAE,MAAM,CAAC;IAChB,qCAAqC;IACrC,QAAQ,EAAE;QACR,GAAG,EAAE,OAAO,CAAC;QACb,GAAG,EAAE,OAAO,CAAC;QACb,QAAQ,EAAE,OAAO,CAAC;KACnB,CAAC;CACH"}
|
|
File without changes
|
|
File without changes
|