@pdfvector/instance-contract 0.0.46 → 0.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  export * from "./fetch.js";
2
2
  export * from "./find-citations.js";
3
3
  export * from "./paper-graph.js";
4
+ export * from "./parse.js";
4
5
  export * from "./search.js";
5
6
  export * from "./search-grants.js";
6
7
  export * from "./similar-papers.js";
@@ -1,6 +1,7 @@
1
1
  export * from "./fetch.js";
2
2
  export * from "./find-citations.js";
3
3
  export * from "./paper-graph.js";
4
+ export * from "./parse.js";
4
5
  export * from "./search.js";
5
6
  export * from "./search-grants.js";
6
7
  export * from "./similar-papers.js";
@@ -0,0 +1,44 @@
1
+ import { z } from "zod";
2
+ export declare const parse: import("@orpc/contract").ContractProcedureBuilderWithInputOutput<z.ZodObject<{
3
+ id: z.ZodOptional<z.ZodString>;
4
+ url: z.ZodOptional<z.ZodURL>;
5
+ model: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
6
+ pro: "pro";
7
+ nano: "nano";
8
+ mini: "mini";
9
+ max: "max";
10
+ auto: "auto";
11
+ }>>>;
12
+ callback: z.ZodOptional<z.ZodObject<{
13
+ url: z.ZodURL;
14
+ type: z.ZodOptional<z.ZodString>;
15
+ }, z.core.$strip>>;
16
+ }, z.core.$strip>, z.ZodObject<{
17
+ id: z.ZodString;
18
+ title: z.ZodOptional<z.ZodNullable<z.ZodString>>;
19
+ doi: z.ZodOptional<z.ZodNullable<z.ZodString>>;
20
+ url: z.ZodOptional<z.ZodNullable<z.ZodString>>;
21
+ providerURL: z.ZodOptional<z.ZodNullable<z.ZodString>>;
22
+ pdfURL: z.ZodString;
23
+ detectedProvider: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
24
+ "semantic-scholar": "semantic-scholar";
25
+ pubmed: "pubmed";
26
+ arxiv: "arxiv";
27
+ "google-scholar": "google-scholar";
28
+ eric: "eric";
29
+ "europe-pmc": "europe-pmc";
30
+ openalex: "openalex";
31
+ crossref: "crossref";
32
+ }>>>;
33
+ markdown: z.ZodString;
34
+ pageCount: z.ZodNumber;
35
+ model: z.ZodEnum<{
36
+ pro: "pro";
37
+ nano: "nano";
38
+ mini: "mini";
39
+ max: "max";
40
+ }>;
41
+ credits: z.ZodNumber;
42
+ requestId: z.ZodNumber;
43
+ html: z.ZodOptional<z.ZodString>;
44
+ }, z.core.$strip>, Record<never, never>, Record<never, never>>;
@@ -0,0 +1,126 @@
1
+ import { oc } from "@orpc/contract";
2
+ import { z } from "zod";
3
+ import { fetchableUrlSchema } from "../../fetchable-url-schema.js";
4
+ import { pdfvectorModelSchema } from "../../pdfvector-model-schema.js";
5
+ import { documentParseModelDescription, outputModelDescription, } from "../../supported-mimes.js";
6
+ import { providerSchema } from "./provider.js";
7
+ const callbackSchema = z
8
+ .object({
9
+ url: fetchableUrlSchema.describe("Webhook URL where results will be POSTed when processing completes"),
10
+ type: z
11
+ .string()
12
+ .optional()
13
+ .describe("Callback type identifier (e.g. 'zapier')"),
14
+ })
15
+ .optional()
16
+ .describe("Optional webhook callback for async processing. " +
17
+ "When provided, the server returns 202 immediately and POSTs the full response payload to the callback URL when processing completes. " +
18
+ "On error, the callback receives a POST with X-Pdfvector-Callback-Failed: true header and error details in the body. " +
19
+ "Useful for long-running operations that may exceed client timeout limits.");
20
+ const parseInputSchema = z
21
+ .object({
22
+ id: z
23
+ .string({ message: "id must be a string" })
24
+ .trim()
25
+ .min(1, "id must be a non-empty string")
26
+ .optional()
27
+ .describe("Publication identifier to resolve and parse. Supports DOI, PubMed ID, ArXiv ID, Semantic Scholar ID, ERIC ID, Europe PMC ID, OpenAlex ID."),
28
+ url: fetchableUrlSchema
29
+ .optional()
30
+ .describe("Publication URL to resolve and parse. Supports known provider URLs such as DOI, ArXiv, PubMed, Semantic Scholar, ERIC, Europe PMC, OpenAlex, Springer, Nature, bioRxiv, and medRxiv."),
31
+ model: z
32
+ .enum(["auto", ...pdfvectorModelSchema.options])
33
+ .optional()
34
+ .default("auto")
35
+ .describe(documentParseModelDescription),
36
+ callback: callbackSchema,
37
+ })
38
+ .superRefine((input, ctx) => {
39
+ const hasId = typeof input.id === "string" && input.id.length > 0;
40
+ const hasUrl = typeof input.url === "string" && input.url.length > 0;
41
+ if (hasId === hasUrl) {
42
+ ctx.addIssue({
43
+ code: "custom",
44
+ path: ["id"],
45
+ message: "Provide exactly one of id or url",
46
+ });
47
+ }
48
+ });
49
+ const parseOutputSchema = z.object({
50
+ id: z.string().describe("Original input identifier or URL"),
51
+ title: z.string().nullish().describe("Publication title, if available"),
52
+ doi: z.string().nullish().describe("Publication DOI, if available"),
53
+ url: z
54
+ .string()
55
+ .nullish()
56
+ .describe("PDF Vector publication URL, if available"),
57
+ providerURL: z
58
+ .string()
59
+ .nullish()
60
+ .describe("Provider publication page URL, if available"),
61
+ pdfURL: z.string().describe("Resolved public PDF URL used for parsing"),
62
+ detectedProvider: providerSchema
63
+ .nullish()
64
+ .describe("Provider detected from the input identifier or URL"),
65
+ markdown: z
66
+ .string()
67
+ .describe("Extracted markdown content from the paper PDF"),
68
+ pageCount: z
69
+ .number()
70
+ .int()
71
+ .describe("Total number of pages in the paper PDF"),
72
+ model: pdfvectorModelSchema.describe(outputModelDescription),
73
+ credits: z
74
+ .number()
75
+ .int()
76
+ .describe("Number of credits consumed by this API call. Cost per page: nano=1, mini=2, pro=4, max=8."),
77
+ requestId: z
78
+ .number()
79
+ .int()
80
+ .describe("Unique request identifier for this API call"),
81
+ html: z
82
+ .string()
83
+ .optional()
84
+ .describe("Full HTML representation of the paper content. Only available when using the 'max' model."),
85
+ });
86
+ const requestExamples = {
87
+ "Parse ArXiv paper by ID": {
88
+ summary: "Parse ArXiv paper by ID",
89
+ value: {
90
+ id: "1706.03762",
91
+ model: "auto",
92
+ },
93
+ },
94
+ "Parse paper by DOI URL": {
95
+ summary: "Parse paper by DOI URL",
96
+ value: {
97
+ url: "https://doi.org/10.1038/nature12373",
98
+ model: "pro",
99
+ },
100
+ },
101
+ "Parse PubMed paper by ID": {
102
+ summary: "Parse PubMed paper by ID",
103
+ value: {
104
+ id: "33116299",
105
+ model: "auto",
106
+ },
107
+ },
108
+ };
109
+ export const parse = oc
110
+ .input(parseInputSchema)
111
+ .output(parseOutputSchema)
112
+ .route({
113
+ summary: "Parse an academic paper to markdown",
114
+ description: "Resolve an academic paper URL or identifier to a public PDF URL, parse the PDF, and return markdown using the same model tiers as document.parse. Returns NOT_FOUND when the paper cannot be found or no public PDF is available.",
115
+ tags: ["Academic"],
116
+ spec: (op) => {
117
+ op.security = [{ bearerAuth: [] }];
118
+ const reqBody = op.requestBody;
119
+ if (reqBody?.content) {
120
+ for (const mediaType of Object.values(reqBody.content)) {
121
+ mediaType.examples = requestExamples;
122
+ }
123
+ }
124
+ return op;
125
+ },
126
+ });
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # @pdfvector/instance-contract
2
2
 
3
+ ## 0.0.48
4
+ ### Patch Changes
5
+
6
+
7
+
8
+ - [#250](https://github.com/phuctm97/pdfvector/pull/250) [`a2e6883`](https://github.com/phuctm97/pdfvector/commit/a2e68833d9f0dd6b38ea5b4b2a91aefb9f13aaf8) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Upgrade workspace dependencies and adapt SPA navigation state typing for React Router 7.15.1.
9
+
10
+ ## 0.0.47
11
+ ### Patch Changes
12
+
13
+
14
+
15
+ - [#240](https://github.com/phuctm97/pdfvector/pull/240) [`2c8691c`](https://github.com/phuctm97/pdfvector/commit/2c8691c9bbd251ff7b7a153fd4254d9360c11c08) Thanks [@khanhduyvt0101](https://github.com/khanhduyvt0101)! - Add academic.parse to resolve academic paper IDs or provider URLs to public PDFs and parse them to markdown.
16
+
3
17
  ## 0.0.46
4
18
  ### Patch Changes
5
19
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdfvector/instance-contract",
3
- "version": "0.0.46",
3
+ "version": "0.0.48",
4
4
  "type": "module",
5
5
  "description": "API contract definitions for PDF Vector instance server",
6
6
  "license": "MIT",
@@ -20,8 +20,8 @@
20
20
  },
21
21
  "main": ".tsc/lib/index.js",
22
22
  "dependencies": {
23
- "@orpc/client": "^1.14.2",
24
- "@orpc/contract": "^1.14.2",
23
+ "@orpc/client": "^1.14.3",
24
+ "@orpc/contract": "^1.14.3",
25
25
  "@pdfvector/util": "0.0.22"
26
26
  },
27
27
  "peerDependencies": {