@ncukondo/search-hub 0.12.2 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/cli/commands/diff.js +2 -2
  2. package/dist/cli/commands/diff.js.map +1 -1
  3. package/dist/cli/commands/query/init.d.ts +5 -0
  4. package/dist/cli/commands/query/init.d.ts.map +1 -1
  5. package/dist/cli/commands/query/init.js +9 -1
  6. package/dist/cli/commands/query/init.js.map +1 -1
  7. package/dist/cli/commands/query/translate.d.ts.map +1 -1
  8. package/dist/cli/commands/query/translate.js +5 -0
  9. package/dist/cli/commands/query/translate.js.map +1 -1
  10. package/dist/cli/commands/query/validate.d.ts +22 -1
  11. package/dist/cli/commands/query/validate.d.ts.map +1 -1
  12. package/dist/cli/commands/query/validate.js +65 -22
  13. package/dist/cli/commands/query/validate.js.map +1 -1
  14. package/dist/cli/commands/review/extract.d.ts.map +1 -1
  15. package/dist/cli/commands/review/extract.js +1 -2
  16. package/dist/cli/commands/review/extract.js.map +1 -1
  17. package/dist/cli/commands/review/finalize.d.ts.map +1 -1
  18. package/dist/cli/commands/review/finalize.js +1 -2
  19. package/dist/cli/commands/review/finalize.js.map +1 -1
  20. package/dist/cli/commands/review/init.d.ts.map +1 -1
  21. package/dist/cli/commands/review/init.js +2 -5
  22. package/dist/cli/commands/review/init.js.map +1 -1
  23. package/dist/cli/commands/review/merge.d.ts.map +1 -1
  24. package/dist/cli/commands/review/merge.js +1 -2
  25. package/dist/cli/commands/review/merge.js.map +1 -1
  26. package/dist/cli/index.d.ts.map +1 -1
  27. package/dist/cli/index.js +81 -7
  28. package/dist/cli/index.js.map +1 -1
  29. package/dist/cli/suggestions/index.d.ts.map +1 -1
  30. package/dist/cli/suggestions/index.js +10 -0
  31. package/dist/cli/suggestions/index.js.map +1 -1
  32. package/dist/cli/suggestions/rules.d.ts.map +1 -1
  33. package/dist/cli/suggestions/rules.js +21 -8
  34. package/dist/cli/suggestions/rules.js.map +1 -1
  35. package/dist/cli/suggestions/types.d.ts +11 -0
  36. package/dist/cli/suggestions/types.d.ts.map +1 -1
  37. package/dist/index.js +5 -0
  38. package/dist/index.js.map +1 -1
  39. package/dist/providers/arxiv/translator.d.ts.map +1 -1
  40. package/dist/providers/arxiv/translator.js +5 -2
  41. package/dist/providers/arxiv/translator.js.map +1 -1
  42. package/dist/providers/base/types.d.ts +2 -0
  43. package/dist/providers/base/types.d.ts.map +1 -1
  44. package/dist/providers/base/types.js.map +1 -1
  45. package/dist/providers/base/warnings.d.ts +14 -0
  46. package/dist/providers/base/warnings.d.ts.map +1 -0
  47. package/dist/providers/base/warnings.js +33 -0
  48. package/dist/providers/base/warnings.js.map +1 -0
  49. package/dist/providers/eric/translator.d.ts.map +1 -1
  50. package/dist/providers/eric/translator.js +5 -2
  51. package/dist/providers/eric/translator.js.map +1 -1
  52. package/dist/providers/pubmed/translator.d.ts.map +1 -1
  53. package/dist/providers/pubmed/translator.js +5 -2
  54. package/dist/providers/pubmed/translator.js.map +1 -1
  55. package/dist/providers/scopus/translator.d.ts.map +1 -1
  56. package/dist/providers/scopus/translator.js +22 -5
  57. package/dist/providers/scopus/translator.js.map +1 -1
  58. package/dist/query/__test-helpers__/mock-mesh-client.d.ts +12 -0
  59. package/dist/query/__test-helpers__/mock-mesh-client.d.ts.map +1 -0
  60. package/dist/query/index.d.ts +4 -0
  61. package/dist/query/index.d.ts.map +1 -1
  62. package/dist/query/json-schema.d.ts +3 -0
  63. package/dist/query/json-schema.d.ts.map +1 -0
  64. package/dist/query/json-schema.js +48 -0
  65. package/dist/query/json-schema.js.map +1 -0
  66. package/dist/query/mesh-lookup.d.ts +47 -0
  67. package/dist/query/mesh-lookup.d.ts.map +1 -0
  68. package/dist/query/mesh-lookup.js +151 -0
  69. package/dist/query/mesh-lookup.js.map +1 -0
  70. package/dist/query/parser.js +1 -1
  71. package/dist/query/parser.js.map +1 -1
  72. package/dist/query/types.d.ts +2 -2
  73. package/dist/query/types.d.ts.map +1 -1
  74. package/dist/query/validator.d.ts +5 -5
  75. package/dist/query/validator.d.ts.map +1 -1
  76. package/dist/query/validator.js +5 -2
  77. package/dist/query/validator.js.map +1 -1
  78. package/dist/query/vocab-cache.d.ts +15 -0
  79. package/dist/query/vocab-cache.d.ts.map +1 -0
  80. package/dist/query/vocab-cache.js +44 -0
  81. package/dist/query/vocab-cache.js.map +1 -0
  82. package/dist/query/vocab-validator.d.ts +71 -0
  83. package/dist/query/vocab-validator.d.ts.map +1 -0
  84. package/dist/query/vocab-validator.js +153 -0
  85. package/dist/query/vocab-validator.js.map +1 -0
  86. package/dist/utils/levenshtein.d.ts +6 -0
  87. package/dist/utils/levenshtein.d.ts.map +1 -0
  88. package/dist/utils/levenshtein.js +21 -0
  89. package/dist/utils/levenshtein.js.map +1 -0
  90. package/package.json +1 -1
@@ -1 +1 @@
1
- {"version":3,"file":"validator.js","sources":["../../src/query/validator.ts"],"sourcesContent":["/**\n * Query Schema Validation using Zod\n *\n * Validates query YAML structure against the query DSL schema.\n * See spec/models/query-dsl.md for the full specification.\n */\n\nimport { z } from 'zod';\nimport type { QueryAST } from './types.js';\n\n/**\n * Schema for field types.\n */\nexport const fieldTypeSchema = z.enum([\n 'title',\n 'abstract',\n 'title_abstract',\n 'author',\n 'keyword',\n 'all',\n]);\n\n/**\n * Schema for term block containing search terms.\n */\nexport const termBlockSchema = z.object({\n keywords: z.array(z.string()).min(1),\n mesh: z.array(z.string()).optional(),\n emtree: z.array(z.string()).optional(),\n eric: z.array(z.string()).optional(),\n exclude: z.array(z.string()).optional(),\n});\n\n/**\n * Schema for operator.\n */\nexport const operatorSchema = z.enum(['AND', 'OR']);\n\n/**\n * Schema for query block.\n */\nexport const queryBlockSchema = z.object({\n field: fieldTypeSchema,\n terms: termBlockSchema,\n operator: operatorSchema,\n});\n\n/**\n * Schema for publication type filter.\n */\nexport const publicationTypeFilterSchema = z.object({\n include: z.array(z.string()).optional(),\n exclude: z.array(z.string()).optional(),\n});\n\n/**\n * Schema for filters (YAML input format with snake_case).\n * Transforms to camelCase for internal use.\n */\nexport const filtersSchema = z\n .object({\n year_from: z.number().int().optional(),\n year_to: z.number().int().optional(),\n language: z.array(z.string()).optional(),\n publication_types: publicationTypeFilterSchema.optional(),\n })\n .optional()\n .default({})\n .transform((data) => ({\n yearFrom: data.year_from,\n yearTo: data.year_to,\n languages: data.language,\n publicationTypes: data.publication_types,\n }));\n\n/**\n * Schema for override block (YAML input format).\n */\nexport const overrideBlockSchema = z\n .object({\n filters: z\n .object({\n year_from: z.number().int().optional(),\n year_to: z.number().int().optional(),\n language: z.array(z.string()).optional(),\n publication_types: publicationTypeFilterSchema.optional(),\n })\n .optional(),\n categories: z.array(z.string()).optional(),\n source_types: z.array(z.string()).optional(),\n })\n .transform((data) => ({\n filters: data.filters\n ? {\n yearFrom: data.filters.year_from,\n yearTo: data.filters.year_to,\n languages: data.filters.language,\n publicationTypes: data.filters.publication_types,\n }\n : undefined,\n categories: data.categories,\n sourceTypes: data.source_types,\n }));\n\n/**\n * Provider names schema.\n */\nexport const providerNameSchema = z.enum([\n 'pubmed',\n 'scopus',\n 'eric',\n 'arxiv',\n 'wos',\n 'embase',\n]);\n\n/**\n * Schema for overrides object (partial record of provider -> override).\n */\nconst overridesSchema = z\n .object({\n pubmed: overrideBlockSchema.optional(),\n scopus: overrideBlockSchema.optional(),\n eric: overrideBlockSchema.optional(),\n arxiv: overrideBlockSchema.optional(),\n wos: overrideBlockSchema.optional(),\n embase: overrideBlockSchema.optional(),\n })\n .optional()\n .default({});\n\n/**\n * Schema for the complete query file (YAML input format).\n * Transforms to QueryAST for internal use.\n */\nexport const queryFileSchema = z\n .object({\n name: z.string().min(1),\n description: z.string().optional(),\n query: z.array(queryBlockSchema).min(1),\n filters: filtersSchema,\n overrides: overridesSchema,\n })\n .transform((data) => ({\n name: data.name,\n description: data.description,\n blocks: data.query,\n filters: data.filters,\n overrides: data.overrides,\n }));\n\n/**\n * Validate a parsed YAML object against the query schema.\n * Returns a validated QueryAST.\n */\nexport function validateQueryFile(data: unknown): QueryAST {\n return queryFileSchema.parse(data);\n}\n\n/**\n * Validation error with path information.\n */\nexport class ValidationError extends Error {\n constructor(\n public readonly path: string,\n message: string\n ) {\n super(message);\n this.name = 'ValidationError';\n }\n}\n\n/**\n * Format Zod validation errors into an array of ValidationError objects.\n *\n * @param data - The data to validate\n * @returns Array of ValidationError objects (empty if valid)\n */\nexport function formatValidationErrors(data: unknown): ValidationError[] {\n const result = queryFileSchema.safeParse(data);\n\n if (result.success) {\n return [];\n }\n\n return result.error.issues.map((issue) => {\n const path = issue.path.join('.');\n return new ValidationError(path, issue.message);\n });\n}\n"],"names":[],"mappings":";AAaO,MAAM,kBAAkB,EAAE,KAAK;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKM,MAAM,kBAAkB,EAAE,OAAO;AAAA,EACtC,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC;AAAA,EACnC,MAAM,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC1B,QAAQ,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC5B,MAAM,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC1B,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAC/B,CAAC;AAKM,MAAM,iBAAiB,EAAE,KAAK,CAAC,OAAO,IAAI,CAAC;AAK3C,MAAM,mBAAmB,EAAE,OAAO;AAAA,EACvC,OAAO;AAAA,EACP,OAAO;AAAA,EACP,UAAU;AACZ,CAAC;AAKM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC7B,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAC/B,CAAC;AAMM,MAAM,gBAAgB,EAC1B,OAAO;AAAA,EACN,WAAW,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,EAC5B,SAAS,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,EAC1B,UAAU,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC9B,mBAAmB,4BAA4B,SAAA;AACjD,CAAC,EACA,WACA,QAAQ,CAAA,CAAE,EACV,UAAU,CAAC,UAAU;AAAA,EACpB,UAAU,KAAK;AAAA,EACf,QAAQ,KAAK;AAAA,EACb,WAAW,KAAK;AAAA,EAChB,kBAAkB,KAAK;AACzB,EAAE;AAKG,MAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,SAAS,EACN,OAAO;AAAA,IACN,WAAW,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,IAC5B,SAAS,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,IAC1B,UAAU,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,IAC9B,mBAAmB,4BAA4B,SAAA;AAAA,EAAS,CACzD,EACA,SAAA;AAAA,EACH,YAAY,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAChC,cAAc,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AACpC,CAAC,EACA,UAAU,CAAC,UAAU;AAAA,EACpB,SAAS,KAAK,UACV;AAAA,IACE,UAAU,KAAK,QAAQ;AAAA,IACvB,QAAQ,KAAK,QAAQ;AAAA,IACrB,WAAW,KAAK,QAAQ;AAAA,IACxB,kBAAkB,KAAK,QAAQ;AAAA,EAAA,IAEjC;AAAA,EACJ,YAAY,KAAK;AAAA,EACjB,aAAa,KAAK;AACpB,EAAE;AAK8B,EAAE,KAAK;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKD,MAAM,kBAAkB,EACrB,OAAO;AAAA,EACN,QAAQ,oBAAoB,SAAA;AAAA,EAC5B,QAAQ,oBAAoB,SAAA;AAAA,EAC5B,MAAM,oBAAoB,SAAA;AAAA,EAC1B,OAAO,oBAAoB,SAAA;AAAA,EAC3B,KAAK,oBAAoB,SAAA;AAAA,EACzB,QAAQ,oBAAoB,SAAA;AAC9B,CAAC,EACA,SAAA,EACA,QAAQ,EAAE;AAMN,MAAM,kBAAkB,EAC5B,OAAO;AAAA,EACN,MAAM,EAAE,SAAS,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAA,EAAS,SAAA;AAAA,EACxB,OAAO,EAAE,MAAM,gBAAgB,EAAE,IAAI,CAAC;AAAA,EACtC,SAAS;AAAA,EACT,WAAW;AACb,CAAC,EACA,UAAU,CAAC,UAAU;AAAA,EACpB,MAAM,KAAK;AAAA,EACX,aAAa,KAAK;AAAA,EAClB,QAAQ,KAAK;AAAA,EACb,SAAS,KAAK;AAAA,EACd,WAAW,KAAK;AAClB,EAAE;AAMG,SAAS,kBAAkB,MAAyB;AACzD,SAAO,gBAAgB,MAAM,IAAI;AACnC;AAKO,MAAM,wBAAwB,MAAM;AAAA,EACzC,YACkB,MAChB,SACA;AACA,UAAM,OAAO;AAHG,SAAA,OAAA;AAIhB,SAAK,OAAO;AAAA,EACd;AACF;AAQO,SAAS,uBAAuB,MAAkC;AACvE,QAAM,SAAS,gBAAgB,UAAU,IAAI;AAE7C,MAAI,OAAO,SAAS;AAClB,WAAO,CAAA;AAAA,EACT;AAEA,SAAO,OAAO,MAAM,OAAO,IAAI,CAAC,UAAU;AACxC,UAAM,OAAO,MAAM,KAAK,KAAK,GAAG;AAChC,WAAO,IAAI,gBAAgB,MAAM,MAAM,OAAO;AAAA,EAChD,CAAC;AACH;"}
1
+ {"version":3,"file":"validator.js","sources":["../../src/query/validator.ts"],"sourcesContent":["/**\n * Query Schema Validation using Zod\n *\n * Validates query YAML structure against the query DSL schema.\n * See spec/models/query-dsl.md for the full specification.\n */\n\nimport { z } from 'zod';\nimport type { QueryAST } from './types.js';\n\n/**\n * Schema for field types.\n */\nexport const fieldTypeSchema = z.enum([\n 'title',\n 'abstract',\n 'title_abstract',\n 'author',\n 'keyword',\n 'all',\n]);\n\n/**\n * Schema for term block containing search terms.\n */\nexport const termBlockSchema = z.object({\n keywords: z.array(z.string()).min(1).optional(),\n mesh: z.array(z.string()).optional(),\n emtree: z.array(z.string()).optional(),\n eric: z.array(z.string()).optional(),\n exclude: z.array(z.string()).optional(),\n}).refine(\n (data) => data.keywords?.length || data.mesh?.length || data.emtree?.length || data.eric?.length,\n { message: 'At least one of keywords, mesh, emtree, or eric is required' }\n);\n\n/**\n * Schema for operator.\n */\nexport const operatorSchema = z.enum(['AND', 'OR']);\n\n/**\n * Schema for query block.\n */\nexport const queryBlockSchema = z.object({\n field: fieldTypeSchema,\n terms: termBlockSchema,\n operator: operatorSchema,\n});\n\n/**\n * Schema for publication type filter.\n */\nexport const publicationTypeFilterSchema = z.object({\n include: z.array(z.string()).optional(),\n exclude: z.array(z.string()).optional(),\n});\n\n/**\n * Schema for filters (YAML input format with snake_case).\n * Transforms to camelCase for internal use.\n */\nexport const filtersSchema = z\n .object({\n year_from: z.number().int().optional(),\n year_to: z.number().int().optional(),\n language: z.array(z.string()).optional(),\n publication_types: publicationTypeFilterSchema.optional(),\n })\n .optional()\n .default({})\n .transform((data) => ({\n yearFrom: data.year_from,\n yearTo: data.year_to,\n languages: data.language,\n publicationTypes: data.publication_types,\n }));\n\n/**\n * Schema for override block (YAML input format).\n */\nexport const overrideBlockSchema = z\n .object({\n filters: z\n .object({\n year_from: z.number().int().optional(),\n year_to: z.number().int().optional(),\n language: z.array(z.string()).optional(),\n publication_types: publicationTypeFilterSchema.optional(),\n })\n .optional(),\n categories: z.array(z.string()).optional(),\n source_types: z.array(z.string()).optional(),\n })\n .transform((data) => ({\n filters: data.filters\n ? {\n yearFrom: data.filters.year_from,\n yearTo: data.filters.year_to,\n languages: data.filters.language,\n publicationTypes: data.filters.publication_types,\n }\n : undefined,\n categories: data.categories,\n sourceTypes: data.source_types,\n }));\n\n/**\n * Provider names schema.\n */\nexport const providerNameSchema = z.enum([\n 'pubmed',\n 'scopus',\n 'eric',\n 'arxiv',\n 'wos',\n 'embase',\n]);\n\n/**\n * Schema for overrides object (partial record of provider -> override).\n */\nconst overridesSchema = z\n .object({\n pubmed: overrideBlockSchema.optional(),\n scopus: overrideBlockSchema.optional(),\n eric: overrideBlockSchema.optional(),\n arxiv: overrideBlockSchema.optional(),\n wos: overrideBlockSchema.optional(),\n embase: overrideBlockSchema.optional(),\n })\n .optional()\n .default({});\n\n/**\n * Schema for the complete query file (YAML input format).\n * Transforms to QueryAST for internal use.\n */\nexport const queryFileSchema = z\n .object({\n name: z.string().min(1),\n description: z.string().optional(),\n query: z.array(queryBlockSchema).min(1),\n filters: filtersSchema,\n overrides: overridesSchema,\n })\n .transform((data) => ({\n name: data.name,\n description: data.description,\n blocks: data.query,\n filters: data.filters,\n overrides: data.overrides,\n }));\n\n/**\n * Validate a parsed YAML object against the query schema.\n * Returns a validated QueryAST.\n */\nexport function validateQueryFile(data: unknown): QueryAST {\n return queryFileSchema.parse(data);\n}\n\n/**\n * Validation error with path information.\n */\nexport class ValidationError extends Error {\n constructor(\n public readonly path: string,\n message: string\n ) {\n super(message);\n this.name = 'ValidationError';\n }\n}\n\n/**\n * Format Zod validation errors into an array of ValidationError objects.\n *\n * @param data - The data to validate\n * @returns Array of ValidationError objects (empty if valid)\n */\nexport function formatValidationErrors(data: unknown): ValidationError[] {\n const result = queryFileSchema.safeParse(data);\n\n if (result.success) {\n return [];\n }\n\n return result.error.issues.map((issue) => {\n const path = issue.path.join('.');\n return new ValidationError(path, issue.message);\n });\n}\n"],"names":[],"mappings":";AAaO,MAAM,kBAAkB,EAAE,KAAK;AAAA,EACpC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKM,MAAM,kBAAkB,EAAE,OAAO;AAAA,EACtC,UAAU,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,IAAI,CAAC,EAAE,SAAA;AAAA,EACrC,MAAM,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC1B,QAAQ,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC5B,MAAM,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC1B,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAC/B,CAAC,EAAE;AAAA,EACD,CAAC,SAAS,KAAK,UAAU,UAAU,KAAK,MAAM,UAAU,KAAK,QAAQ,UAAU,KAAK,MAAM;AAAA,EAC1F,EAAE,SAAS,8DAAA;AACb;AAKO,MAAM,iBAAiB,EAAE,KAAK,CAAC,OAAO,IAAI,CAAC;AAK3C,MAAM,mBAAmB,EAAE,OAAO;AAAA,EACvC,OAAO;AAAA,EACP,OAAO;AAAA,EACP,UAAU;AACZ,CAAC;AAKM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC7B,SAAS,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAC/B,CAAC;AAMM,MAAM,gBAAgB,EAC1B,OAAO;AAAA,EACN,WAAW,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,EAC5B,SAAS,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,EAC1B,UAAU,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAC9B,mBAAmB,4BAA4B,SAAA;AACjD,CAAC,EACA,WACA,QAAQ,CAAA,CAAE,EACV,UAAU,CAAC,UAAU;AAAA,EACpB,UAAU,KAAK;AAAA,EACf,QAAQ,KAAK;AAAA,EACb,WAAW,KAAK;AAAA,EAChB,kBAAkB,KAAK;AACzB,EAAE;AAKG,MAAM,sBAAsB,EAChC,OAAO;AAAA,EACN,SAAS,EACN,OAAO;AAAA,IACN,WAAW,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,IAC5B,SAAS,EAAE,OAAA,EAAS,IAAA,EAAM,SAAA;AAAA,IAC1B,UAAU,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,IAC9B,mBAAmB,4BAA4B,SAAA;AAAA,EAAS,CACzD,EACA,SAAA;AAAA,EACH,YAAY,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AAAA,EAChC,cAAc,EAAE,MAAM,EAAE,OAAA,CAAQ,EAAE,SAAA;AACpC,CAAC,EACA,UAAU,CAAC,UAAU;AAAA,EACpB,SAAS,KAAK,UACV;AAAA,IACE,UAAU,KAAK,QAAQ;AAAA,IACvB,QAAQ,KAAK,QAAQ;AAAA,IACrB,WAAW,KAAK,QAAQ;AAAA,IACxB,kBAAkB,KAAK,QAAQ;AAAA,EAAA,IAEjC;AAAA,EACJ,YAAY,KAAK;AAAA,EACjB,aAAa,KAAK;AACpB,EAAE;AAK8B,EAAE,KAAK;AAAA,EACvC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAKD,MAAM,kBAAkB,EACrB,OAAO;AAAA,EACN,QAAQ,oBAAoB,SAAA;AAAA,EAC5B,QAAQ,oBAAoB,SAAA;AAAA,EAC5B,MAAM,oBAAoB,SAAA;AAAA,EAC1B,OAAO,oBAAoB,SAAA;AAAA,EAC3B,KAAK,oBAAoB,SAAA;AAAA,EACzB,QAAQ,oBAAoB,SAAA;AAC9B,CAAC,EACA,SAAA,EACA,QAAQ,EAAE;AAMN,MAAM,kBAAkB,EAC5B,OAAO;AAAA,EACN,MAAM,EAAE,SAAS,IAAI,CAAC;AAAA,EACtB,aAAa,EAAE,OAAA,EAAS,SAAA;AAAA,EACxB,OAAO,EAAE,MAAM,gBAAgB,EAAE,IAAI,CAAC;AAAA,EACtC,SAAS;AAAA,EACT,WAAW;AACb,CAAC,EACA,UAAU,CAAC,UAAU;AAAA,EACpB,MAAM,KAAK;AAAA,EACX,aAAa,KAAK;AAAA,EAClB,QAAQ,KAAK;AAAA,EACb,SAAS,KAAK;AAAA,EACd,WAAW,KAAK;AAClB,EAAE;AAMG,SAAS,kBAAkB,MAAyB;AACzD,SAAO,gBAAgB,MAAM,IAAI;AACnC;AAKO,MAAM,wBAAwB,MAAM;AAAA,EACzC,YACkB,MAChB,SACA;AACA,UAAM,OAAO;AAHG,SAAA,OAAA;AAIhB,SAAK,OAAO;AAAA,EACd;AACF;AAQO,SAAS,uBAAuB,MAAkC;AACvE,QAAM,SAAS,gBAAgB,UAAU,IAAI;AAE7C,MAAI,OAAO,SAAS;AAClB,WAAO,CAAA;AAAA,EACT;AAEA,SAAO,OAAO,MAAM,OAAO,IAAI,CAAC,UAAU;AACxC,UAAM,OAAO,MAAM,KAAK,KAAK,GAAG;AAChC,WAAO,IAAI,gBAAgB,MAAM,MAAM,OAAO;AAAA,EAChD,CAAC;AACH;"}
@@ -0,0 +1,15 @@
1
+ import { MeSHLookupResult } from './mesh-lookup.js';
2
+ export declare class VocabCache {
3
+ private store;
4
+ private readonly cachePath;
5
+ private readonly ttlMs;
6
+ constructor(options?: {
7
+ cachePath?: string;
8
+ ttlMs?: number;
9
+ });
10
+ load(): Promise<void>;
11
+ save(): Promise<void>;
12
+ get(vocabulary: string, term: string): MeSHLookupResult | undefined;
13
+ set(vocabulary: string, term: string, result: MeSHLookupResult): void;
14
+ }
15
+ //# sourceMappingURL=vocab-cache.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab-cache.d.ts","sourceRoot":"","sources":["../../src/query/vocab-cache.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AAWzD,qBAAa,UAAU;IACrB,OAAO,CAAC,KAAK,CAAuB;IACpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;gBAEnB,OAAO,CAAC,EAAE;QAAE,SAAS,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAE;IAOtD,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAcrB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAK3B,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,SAAS;IAanE,GAAG,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI;CAItE"}
@@ -0,0 +1,44 @@
1
+ import { readFile, mkdir, writeFile } from "node:fs/promises";
2
+ import { join, dirname } from "node:path";
3
+ import { getConfigDir } from "../config/paths.js";
4
+ const DEFAULT_TTL_MS = 30 * 24 * 60 * 60 * 1e3;
5
+ class VocabCache {
6
+ store = {};
7
+ cachePath;
8
+ ttlMs;
9
+ constructor(options) {
10
+ this.cachePath = options?.cachePath ?? join(getConfigDir(), "cache", "vocab-lookup.json");
11
+ this.ttlMs = options?.ttlMs ?? DEFAULT_TTL_MS;
12
+ }
13
+ async load() {
14
+ try {
15
+ const raw = await readFile(this.cachePath, "utf-8");
16
+ const parsed = JSON.parse(raw);
17
+ this.store = typeof parsed === "object" && parsed !== null && !Array.isArray(parsed) ? parsed : {};
18
+ } catch {
19
+ this.store = {};
20
+ }
21
+ }
22
+ async save() {
23
+ await mkdir(dirname(this.cachePath), { recursive: true });
24
+ await writeFile(this.cachePath, JSON.stringify(this.store), "utf-8");
25
+ }
26
+ get(vocabulary, term) {
27
+ const key = `${vocabulary}:${term}`;
28
+ const entry = this.store[key];
29
+ if (!entry) return void 0;
30
+ if (Date.now() - entry.cachedAt > this.ttlMs) {
31
+ delete this.store[key];
32
+ return void 0;
33
+ }
34
+ return entry.result;
35
+ }
36
+ set(vocabulary, term, result) {
37
+ const key = `${vocabulary}:${term}`;
38
+ this.store[key] = { result, cachedAt: Date.now() };
39
+ }
40
+ }
41
+ export {
42
+ VocabCache
43
+ };
44
+ //# sourceMappingURL=vocab-cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab-cache.js","sources":["../../src/query/vocab-cache.ts"],"sourcesContent":["/**\n * File-based cache for vocabulary lookup results.\n *\n * Stores MeSH (and future vocabulary) lookup results on disk to avoid\n * redundant API calls. TTL defaults to 30 days.\n */\nimport { readFile, writeFile, mkdir } from 'node:fs/promises';\nimport { dirname, join } from 'node:path';\nimport { getConfigDir } from '../config/paths.js';\nimport type { MeSHLookupResult } from './mesh-lookup.js';\n\nconst DEFAULT_TTL_MS = 30 * 24 * 60 * 60 * 1000; // 30 days\n\ninterface VocabCacheEntry {\n result: MeSHLookupResult;\n cachedAt: number; // Unix ms\n}\n\ntype VocabCacheStore = Record<string, VocabCacheEntry>;\n\nexport class VocabCache {\n private store: VocabCacheStore = {};\n private readonly cachePath: string;\n private readonly ttlMs: number;\n\n constructor(options?: { cachePath?: string; ttlMs?: number }) {\n this.cachePath =\n options?.cachePath ??\n join(getConfigDir(), 'cache', 'vocab-lookup.json');\n this.ttlMs = options?.ttlMs ?? DEFAULT_TTL_MS;\n }\n\n async load(): Promise<void> {\n try {\n const raw = await readFile(this.cachePath, 'utf-8');\n const parsed: unknown = JSON.parse(raw);\n this.store =\n typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)\n ? (parsed as VocabCacheStore)\n : {};\n } catch {\n // File missing or corrupted JSON — start with empty cache\n this.store = {};\n }\n }\n\n async save(): Promise<void> {\n await mkdir(dirname(this.cachePath), { recursive: true });\n await writeFile(this.cachePath, JSON.stringify(this.store), 'utf-8');\n }\n\n get(vocabulary: string, term: string): MeSHLookupResult | undefined {\n const key = `${vocabulary}:${term}`;\n const entry = this.store[key];\n if (!entry) return undefined;\n\n if (Date.now() - entry.cachedAt > this.ttlMs) {\n delete this.store[key];\n return undefined;\n }\n\n return entry.result;\n }\n\n set(vocabulary: string, term: string, result: MeSHLookupResult): void {\n const key = `${vocabulary}:${term}`;\n this.store[key] = { result, cachedAt: Date.now() };\n }\n}\n"],"names":[],"mappings":";;;AAWA,MAAM,iBAAiB,KAAK,KAAK,KAAK,KAAK;AASpC,MAAM,WAAW;AAAA,EACd,QAAyB,CAAA;AAAA,EAChB;AAAA,EACA;AAAA,EAEjB,YAAY,SAAkD;AAC5D,SAAK,YACH,SAAS,aACT,KAAK,aAAA,GAAgB,SAAS,mBAAmB;AACnD,SAAK,QAAQ,SAAS,SAAS;AAAA,EACjC;AAAA,EAEA,MAAM,OAAsB;AAC1B,QAAI;AACF,YAAM,MAAM,MAAM,SAAS,KAAK,WAAW,OAAO;AAClD,YAAM,SAAkB,KAAK,MAAM,GAAG;AACtC,WAAK,QACH,OAAO,WAAW,YAAY,WAAW,QAAQ,CAAC,MAAM,QAAQ,MAAM,IACjE,SACD,CAAA;AAAA,IACR,QAAQ;AAEN,WAAK,QAAQ,CAAA;AAAA,IACf;AAAA,EACF;AAAA,EAEA,MAAM,OAAsB;AAC1B,UAAM,MAAM,QAAQ,KAAK,SAAS,GAAG,EAAE,WAAW,MAAM;AACxD,UAAM,UAAU,KAAK,WAAW,KAAK,UAAU,KAAK,KAAK,GAAG,OAAO;AAAA,EACrE;AAAA,EAEA,IAAI,YAAoB,MAA4C;AAClE,UAAM,MAAM,GAAG,UAAU,IAAI,IAAI;AACjC,UAAM,QAAQ,KAAK,MAAM,GAAG;AAC5B,QAAI,CAAC,MAAO,QAAO;AAEnB,QAAI,KAAK,IAAA,IAAQ,MAAM,WAAW,KAAK,OAAO;AAC5C,aAAO,KAAK,MAAM,GAAG;AACrB,aAAO;AAAA,IACT;AAEA,WAAO,MAAM;AAAA,EACf;AAAA,EAEA,IAAI,YAAoB,MAAc,QAAgC;AACpE,UAAM,MAAM,GAAG,UAAU,IAAI,IAAI;AACjC,SAAK,MAAM,GAAG,IAAI,EAAE,QAAQ,UAAU,KAAK,MAAI;AAAA,EACjD;AACF;"}
@@ -0,0 +1,71 @@
1
+ import { QueryAST } from './types.js';
2
+ import { MeSHLookupClient } from './mesh-lookup.js';
3
+ import { Provider } from '../providers/base/types.js';
4
+ import { VocabCache } from './vocab-cache.js';
5
+ /** Supported controlled vocabulary types. */
6
+ export type VocabType = 'mesh' | 'eric' | 'emtree';
7
+ /**
8
+ * A controlled vocabulary term extracted from a QueryAST.
9
+ */
10
+ export interface VocabTerm {
11
+ term: string;
12
+ vocabulary: VocabType;
13
+ }
14
+ /**
15
+ * Result of validating a single controlled vocabulary term.
16
+ */
17
+ export interface VocabTermResult {
18
+ term: string;
19
+ vocabulary: VocabType;
20
+ found: boolean;
21
+ suggestions?: string[];
22
+ }
23
+ /**
24
+ * A controlled vocabulary term that failed due to an API error.
25
+ */
26
+ export interface VocabTermError {
27
+ term: string;
28
+ vocabulary: VocabType;
29
+ error: string;
30
+ }
31
+ /**
32
+ * Result of validating all controlled vocabulary terms in a query.
33
+ */
34
+ export interface VocabValidationResult {
35
+ valid: VocabTermResult[];
36
+ invalid: VocabTermResult[];
37
+ errors: VocabTermError[];
38
+ }
39
+ /**
40
+ * Extract all controlled vocabulary terms from a QueryAST.
41
+ * Terms are deduplicated across blocks.
42
+ */
43
+ export declare function extractControlledVocabTerms(ast: QueryAST): VocabTerm[];
44
+ /**
45
+ * A count-based vocabulary validator.
46
+ * Validates terms by executing a count-only search — hit count 0 means invalid.
47
+ */
48
+ export interface CountVocabValidator {
49
+ vocabulary: VocabType;
50
+ countTerm: (term: string) => Promise<number>;
51
+ }
52
+ /**
53
+ * Validate all controlled vocabulary terms in a QueryAST.
54
+ *
55
+ * MeSH terms are validated via the MeSH lookup API (exact match + suggestions).
56
+ * ERIC/Emtree terms are validated via count-only search when countValidators are provided.
57
+ * Terms whose vocabulary has no validator are skipped silently.
58
+ *
59
+ * Different vocabulary groups are validated in parallel; within each group,
60
+ * concurrency is limited to 3 requests at a time.
61
+ */
62
+ export declare function validateControlledVocab(ast: QueryAST, meshClient: MeSHLookupClient, options?: {
63
+ countValidators?: CountVocabValidator[];
64
+ }): Promise<VocabValidationResult>;
65
+ export declare function createEricCountValidator(provider: Provider, options?: {
66
+ cache?: VocabCache;
67
+ }): CountVocabValidator;
68
+ export declare function createEmtreeCountValidator(provider: Provider, options?: {
69
+ cache?: VocabCache;
70
+ }): CountVocabValidator;
71
+ //# sourceMappingURL=vocab-validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab-validator.d.ts","sourceRoot":"","sources":["../../src/query/vocab-validator.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAC3C,OAAO,KAAK,EAAE,gBAAgB,EAAoB,MAAM,kBAAkB,CAAC;AAC3E,OAAO,KAAK,EAAE,QAAQ,EAAiC,MAAM,4BAA4B,CAAC;AAC1F,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAEnD,6CAA6C;AAC7C,MAAM,MAAM,SAAS,GAAG,MAAM,GAAG,MAAM,GAAG,QAAQ,CAAC;AAEnD;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,SAAS,CAAC;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,SAAS,CAAC;IACtB,KAAK,EAAE,OAAO,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,SAAS,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,eAAe,EAAE,CAAC;IACzB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,MAAM,EAAE,cAAc,EAAE,CAAC;CAC1B;AAED;;;GAGG;AACH,wBAAgB,2BAA2B,CAAC,GAAG,EAAE,QAAQ,GAAG,SAAS,EAAE,CA0BtE;AAED;;;GAGG;AACH,MAAM,WAAW,mBAAmB;IAClC,UAAU,EAAE,SAAS,CAAC;IACtB,SAAS,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;CAC9C;AAsBD;;;;;;;;;GASG;AACH,wBAAsB,uBAAuB,CAC3C,GAAG,EAAE,QAAQ,EACb,UAAU,EAAE,gBAAgB,EAC5B,OAAO,CAAC,EAAE;IAAE,eAAe,CAAC,EAAE,mBAAmB,EAAE,CAAA;CAAE,GACpD,OAAO,CAAC,qBAAqB,CAAC,CA2FhC;AAgDD,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,UAAU,CAAA;CAAE,GAC/B,mBAAmB,CAErB;AAED,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE;IAAE,KAAK,CAAC,EAAE,UAAU,CAAA;CAAE,GAC/B,mBAAmB,CAErB"}
@@ -0,0 +1,153 @@
1
+ function extractControlledVocabTerms(ast) {
2
+ const seen = /* @__PURE__ */ new Set();
3
+ const terms = [];
4
+ const vocabFields = [
5
+ { key: "mesh", vocab: "mesh" },
6
+ { key: "eric", vocab: "eric" },
7
+ { key: "emtree", vocab: "emtree" }
8
+ ];
9
+ for (const block of ast.blocks) {
10
+ for (const { key, vocab } of vocabFields) {
11
+ const fieldTerms = block.terms[key];
12
+ if (fieldTerms) {
13
+ for (const term of fieldTerms) {
14
+ const dedupeKey = `${vocab}:${term}`;
15
+ if (!seen.has(dedupeKey)) {
16
+ seen.add(dedupeKey);
17
+ terms.push({ term, vocabulary: vocab });
18
+ }
19
+ }
20
+ }
21
+ }
22
+ }
23
+ return terms;
24
+ }
25
+ async function mapWithConcurrency(items, concurrency, fn) {
26
+ const results = [];
27
+ let index = 0;
28
+ async function worker() {
29
+ while (index < items.length) {
30
+ const i = index++;
31
+ const item = items[i];
32
+ if (item !== void 0) results[i] = await fn(item);
33
+ }
34
+ }
35
+ await Promise.all(
36
+ Array.from({ length: Math.min(concurrency, items.length) }, () => worker())
37
+ );
38
+ return results;
39
+ }
40
+ async function validateControlledVocab(ast, meshClient, options) {
41
+ const terms = extractControlledVocabTerms(ast);
42
+ const countValidatorMap = /* @__PURE__ */ new Map();
43
+ for (const cv of options?.countValidators ?? []) {
44
+ countValidatorMap.set(cv.vocabulary, cv);
45
+ }
46
+ const meshTerms = terms.filter((t) => t.vocabulary === "mesh");
47
+ const countGroups = /* @__PURE__ */ new Map();
48
+ for (const t of terms) {
49
+ if (t.vocabulary === "mesh") continue;
50
+ const validator = countValidatorMap.get(t.vocabulary);
51
+ if (!validator) continue;
52
+ const group = countGroups.get(t.vocabulary) ?? [];
53
+ group.push(t);
54
+ countGroups.set(t.vocabulary, group);
55
+ }
56
+ const CONCURRENCY = 3;
57
+ const meshTask = mapWithConcurrency(meshTerms, CONCURRENCY, async (vocabTerm) => {
58
+ let result;
59
+ try {
60
+ result = await meshClient.lookupTerm(vocabTerm.term);
61
+ } catch (err) {
62
+ return {
63
+ kind: "error",
64
+ error: {
65
+ term: vocabTerm.term,
66
+ vocabulary: vocabTerm.vocabulary,
67
+ error: err instanceof Error ? err.message : String(err)
68
+ }
69
+ };
70
+ }
71
+ const termResult = {
72
+ term: vocabTerm.term,
73
+ vocabulary: vocabTerm.vocabulary,
74
+ found: result.found,
75
+ ...result.suggestions ? { suggestions: result.suggestions } : {}
76
+ };
77
+ return { kind: result.found ? "valid" : "invalid", result: termResult };
78
+ });
79
+ const countTasks = [...countGroups.entries()].map(([vocabType, groupTerms]) => {
80
+ const validator = countValidatorMap.get(vocabType);
81
+ if (!validator) return Promise.resolve([]);
82
+ return mapWithConcurrency(groupTerms, CONCURRENCY, async (vocabTerm) => {
83
+ let count;
84
+ try {
85
+ count = await validator.countTerm(vocabTerm.term);
86
+ } catch (err) {
87
+ return {
88
+ kind: "error",
89
+ error: {
90
+ term: vocabTerm.term,
91
+ vocabulary: vocabTerm.vocabulary,
92
+ error: err instanceof Error ? err.message : String(err)
93
+ }
94
+ };
95
+ }
96
+ const termResult = {
97
+ term: vocabTerm.term,
98
+ vocabulary: vocabTerm.vocabulary,
99
+ found: count > 0
100
+ };
101
+ return { kind: count > 0 ? "valid" : "invalid", result: termResult };
102
+ });
103
+ });
104
+ const allOutcomes = (await Promise.all([meshTask, ...countTasks])).flat();
105
+ const valid = [];
106
+ const invalid = [];
107
+ const errors = [];
108
+ for (const outcome of allOutcomes) {
109
+ if (outcome.kind === "valid") valid.push(outcome.result);
110
+ else if (outcome.kind === "invalid") invalid.push(outcome.result);
111
+ else errors.push(outcome.error);
112
+ }
113
+ return { valid, invalid, errors };
114
+ }
115
+ function buildEricCountQuery(term) {
116
+ return `subject:"${term.replace(/"/g, "")}"`;
117
+ }
118
+ function buildEmtreeCountQuery(term) {
119
+ return `INDEXTERMS("${term.replace(/"/g, "")}")`;
120
+ }
121
+ function createCountValidator(vocabulary, provider, buildQuery, providerName, options) {
122
+ return {
123
+ vocabulary,
124
+ countTerm: async (term) => {
125
+ if (options?.cache) {
126
+ const cached = options.cache.get(vocabulary, term);
127
+ if (cached) return cached.found ? 1 : 0;
128
+ }
129
+ const query = {
130
+ native: buildQuery(term),
131
+ provider: providerName
132
+ };
133
+ const count = await provider.count(query);
134
+ if (options?.cache) {
135
+ options.cache.set(vocabulary, term, { term, found: count > 0 });
136
+ }
137
+ return count;
138
+ }
139
+ };
140
+ }
141
+ function createEricCountValidator(provider, options) {
142
+ return createCountValidator("eric", provider, buildEricCountQuery, "eric", options);
143
+ }
144
+ function createEmtreeCountValidator(provider, options) {
145
+ return createCountValidator("emtree", provider, buildEmtreeCountQuery, "scopus", options);
146
+ }
147
+ export {
148
+ createEmtreeCountValidator,
149
+ createEricCountValidator,
150
+ extractControlledVocabTerms,
151
+ validateControlledVocab
152
+ };
153
+ //# sourceMappingURL=vocab-validator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vocab-validator.js","sources":["../../src/query/vocab-validator.ts"],"sourcesContent":["/**\n * Controlled vocabulary validator.\n *\n * Extracts controlled vocabulary terms (MeSH, etc.) from a QueryAST\n * and validates them against external APIs.\n */\nimport type { QueryAST } from './types.js';\nimport type { MeSHLookupClient, MeSHLookupResult } from './mesh-lookup.js';\nimport type { Provider, ProviderName, TranslatedQuery } from '../providers/base/types.js';\nimport type { VocabCache } from './vocab-cache.js';\n\n/** Supported controlled vocabulary types. */\nexport type VocabType = 'mesh' | 'eric' | 'emtree';\n\n/**\n * A controlled vocabulary term extracted from a QueryAST.\n */\nexport interface VocabTerm {\n term: string;\n vocabulary: VocabType;\n}\n\n/**\n * Result of validating a single controlled vocabulary term.\n */\nexport interface VocabTermResult {\n term: string;\n vocabulary: VocabType;\n found: boolean;\n suggestions?: string[];\n}\n\n/**\n * A controlled vocabulary term that failed due to an API error.\n */\nexport interface VocabTermError {\n term: string;\n vocabulary: VocabType;\n error: string;\n}\n\n/**\n * Result of validating all controlled vocabulary terms in a query.\n */\nexport interface VocabValidationResult {\n valid: VocabTermResult[];\n invalid: VocabTermResult[];\n errors: VocabTermError[];\n}\n\n/**\n * Extract all controlled vocabulary terms from a QueryAST.\n * Terms are deduplicated across blocks.\n */\nexport function extractControlledVocabTerms(ast: QueryAST): VocabTerm[] {\n const seen = new Set<string>();\n const terms: VocabTerm[] = [];\n\n const vocabFields: { key: keyof typeof ast.blocks[0]['terms']; vocab: VocabType }[] = [\n { key: 'mesh', vocab: 'mesh' },\n { key: 'eric', vocab: 'eric' },\n { key: 'emtree', vocab: 'emtree' },\n ];\n\n for (const block of ast.blocks) {\n for (const { key, vocab } of vocabFields) {\n const fieldTerms = block.terms[key];\n if (fieldTerms) {\n for (const term of fieldTerms) {\n const dedupeKey = `${vocab}:${term}`;\n if (!seen.has(dedupeKey)) {\n seen.add(dedupeKey);\n terms.push({ term, vocabulary: vocab });\n }\n }\n }\n }\n }\n\n return terms;\n}\n\n/**\n * A count-based vocabulary validator.\n * Validates terms by executing a count-only search — hit count 0 means invalid.\n */\nexport interface CountVocabValidator {\n vocabulary: VocabType;\n countTerm: (term: string) => Promise<number>;\n}\n\nasync function mapWithConcurrency<T, R>(\n items: T[],\n concurrency: number,\n fn: (item: T) => Promise<R>\n): Promise<R[]> {\n const results: R[] = [];\n let index = 0;\n async function worker() {\n while (index < items.length) {\n const i = index++;\n const item = items[i];\n if (item !== undefined) results[i] = await fn(item);\n }\n }\n await Promise.all(\n Array.from({ length: Math.min(concurrency, items.length) }, () => worker())\n );\n return results;\n}\n\n/**\n * Validate all controlled vocabulary terms in a QueryAST.\n *\n * MeSH terms are validated via the MeSH lookup API (exact match + suggestions).\n * ERIC/Emtree terms are validated via count-only search when countValidators are provided.\n * Terms whose vocabulary has no validator are skipped silently.\n *\n * Different vocabulary groups are validated in parallel; within each group,\n * concurrency is limited to 3 requests at a time.\n */\nexport async function validateControlledVocab(\n ast: QueryAST,\n meshClient: MeSHLookupClient,\n options?: { countValidators?: CountVocabValidator[] }\n): Promise<VocabValidationResult> {\n const terms = extractControlledVocabTerms(ast);\n\n const countValidatorMap = new Map<VocabType, CountVocabValidator>();\n for (const cv of options?.countValidators ?? []) {\n countValidatorMap.set(cv.vocabulary, cv);\n }\n\n // Group terms by vocabulary\n const meshTerms = terms.filter((t) => t.vocabulary === 'mesh');\n const countGroups = new Map<VocabType, VocabTerm[]>();\n for (const t of terms) {\n if (t.vocabulary === 'mesh') continue;\n const validator = countValidatorMap.get(t.vocabulary);\n if (!validator) continue;\n const group = countGroups.get(t.vocabulary) ?? [];\n group.push(t);\n countGroups.set(t.vocabulary, group);\n }\n\n type TermOutcome =\n | { kind: 'valid'; result: VocabTermResult }\n | { kind: 'invalid'; result: VocabTermResult }\n | { kind: 'error'; error: VocabTermError };\n\n const CONCURRENCY = 3;\n\n // Validate MeSH terms\n const meshTask = mapWithConcurrency(meshTerms, CONCURRENCY, async (vocabTerm): Promise<TermOutcome> => {\n let result: MeSHLookupResult;\n try {\n result = await meshClient.lookupTerm(vocabTerm.term);\n } catch (err) {\n return {\n kind: 'error',\n error: {\n term: vocabTerm.term,\n vocabulary: vocabTerm.vocabulary,\n error: err instanceof Error ? err.message : String(err),\n },\n };\n }\n const termResult: VocabTermResult = {\n term: vocabTerm.term,\n vocabulary: vocabTerm.vocabulary,\n found: result.found,\n ...(result.suggestions ? { suggestions: result.suggestions } : {}),\n };\n return { kind: result.found ? 'valid' : 'invalid', result: termResult };\n });\n\n // Validate count-based vocab groups in parallel\n const countTasks = [...countGroups.entries()].map(([vocabType, groupTerms]) => {\n const validator = countValidatorMap.get(vocabType);\n if (!validator) return Promise.resolve([]);\n return mapWithConcurrency(groupTerms, CONCURRENCY, async (vocabTerm): Promise<TermOutcome> => {\n let count: number;\n try {\n count = await validator.countTerm(vocabTerm.term);\n } catch (err) {\n return {\n kind: 'error',\n error: {\n term: vocabTerm.term,\n vocabulary: vocabTerm.vocabulary,\n error: err instanceof Error ? err.message : String(err),\n },\n };\n }\n const termResult: VocabTermResult = {\n term: vocabTerm.term,\n vocabulary: vocabTerm.vocabulary,\n found: count > 0,\n };\n return { kind: count > 0 ? 'valid' : 'invalid', result: termResult };\n });\n });\n\n const allOutcomes = (await Promise.all([meshTask, ...countTasks])).flat();\n\n const valid: VocabTermResult[] = [];\n const invalid: VocabTermResult[] = [];\n const errors: VocabTermError[] = [];\n\n for (const outcome of allOutcomes) {\n if (outcome.kind === 'valid') valid.push(outcome.result);\n else if (outcome.kind === 'invalid') invalid.push(outcome.result);\n else errors.push(outcome.error);\n }\n\n return { valid, invalid, errors };\n}\n\n/**\n * Build a native count-only query for a single ERIC descriptor.\n * Uses subject: field with quoted term.\n */\nfunction buildEricCountQuery(term: string): string {\n return `subject:\"${term.replace(/\"/g, '')}\"`;\n}\n\n/**\n * Build a native count-only query for a single Emtree term.\n * Uses INDEXTERMS() function with quoted term.\n */\nfunction buildEmtreeCountQuery(term: string): string {\n return `INDEXTERMS(\"${term.replace(/\"/g, '')}\")`;\n}\n\nfunction createCountValidator(\n vocabulary: VocabType,\n provider: Provider,\n buildQuery: (term: string) => string,\n providerName: ProviderName,\n options?: { cache?: VocabCache }\n): CountVocabValidator {\n return {\n vocabulary,\n countTerm: async (term: string): Promise<number> => {\n if (options?.cache) {\n const cached = options.cache.get(vocabulary, term);\n if (cached) return cached.found ? 1 : 0;\n }\n\n const query: TranslatedQuery = {\n native: buildQuery(term),\n provider: providerName,\n };\n const count = await provider.count(query);\n\n if (options?.cache) {\n options.cache.set(vocabulary, term, { term, found: count > 0 });\n }\n\n return count;\n },\n };\n}\n\nexport function createEricCountValidator(\n provider: Provider,\n options?: { cache?: VocabCache }\n): CountVocabValidator {\n return createCountValidator('eric', provider, buildEricCountQuery, 'eric', options);\n}\n\nexport function createEmtreeCountValidator(\n provider: Provider,\n options?: { cache?: VocabCache }\n): CountVocabValidator {\n return createCountValidator('emtree', provider, buildEmtreeCountQuery, 'scopus', options);\n}\n"],"names":[],"mappings":"AAsDO,SAAS,4BAA4B,KAA4B;AACtE,QAAM,2BAAW,IAAA;AACjB,QAAM,QAAqB,CAAA;AAE3B,QAAM,cAAgF;AAAA,IACpF,EAAE,KAAK,QAAQ,OAAO,OAAA;AAAA,IACtB,EAAE,KAAK,QAAQ,OAAO,OAAA;AAAA,IACtB,EAAE,KAAK,UAAU,OAAO,SAAA;AAAA,EAAS;AAGnC,aAAW,SAAS,IAAI,QAAQ;AAC9B,eAAW,EAAE,KAAK,MAAA,KAAW,aAAa;AACxC,YAAM,aAAa,MAAM,MAAM,GAAG;AAClC,UAAI,YAAY;AACd,mBAAW,QAAQ,YAAY;AAC7B,gBAAM,YAAY,GAAG,KAAK,IAAI,IAAI;AAClC,cAAI,CAAC,KAAK,IAAI,SAAS,GAAG;AACxB,iBAAK,IAAI,SAAS;AAClB,kBAAM,KAAK,EAAE,MAAM,YAAY,OAAO;AAAA,UACxC;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;AAWA,eAAe,mBACb,OACA,aACA,IACc;AACd,QAAM,UAAe,CAAA;AACrB,MAAI,QAAQ;AACZ,iBAAe,SAAS;AACtB,WAAO,QAAQ,MAAM,QAAQ;AAC3B,YAAM,IAAI;AACV,YAAM,OAAO,MAAM,CAAC;AACpB,UAAI,SAAS,OAAW,SAAQ,CAAC,IAAI,MAAM,GAAG,IAAI;AAAA,IACpD;AAAA,EACF;AACA,QAAM,QAAQ;AAAA,IACZ,MAAM,KAAK,EAAE,QAAQ,KAAK,IAAI,aAAa,MAAM,MAAM,EAAA,GAAK,MAAM,QAAQ;AAAA,EAAA;AAE5E,SAAO;AACT;AAYA,eAAsB,wBACpB,KACA,YACA,SACgC;AAChC,QAAM,QAAQ,4BAA4B,GAAG;AAE7C,QAAM,wCAAwB,IAAA;AAC9B,aAAW,MAAM,SAAS,mBAAmB,CAAA,GAAI;AAC/C,sBAAkB,IAAI,GAAG,YAAY,EAAE;AAAA,EACzC;AAGA,QAAM,YAAY,MAAM,OAAO,CAAC,MAAM,EAAE,eAAe,MAAM;AAC7D,QAAM,kCAAkB,IAAA;AACxB,aAAW,KAAK,OAAO;AACrB,QAAI,EAAE,eAAe,OAAQ;AAC7B,UAAM,YAAY,kBAAkB,IAAI,EAAE,UAAU;AACpD,QAAI,CAAC,UAAW;AAChB,UAAM,QAAQ,YAAY,IAAI,EAAE,UAAU,KAAK,CAAA;AAC/C,UAAM,KAAK,CAAC;AACZ,gBAAY,IAAI,EAAE,YAAY,KAAK;AAAA,EACrC;AAOA,QAAM,cAAc;AAGpB,QAAM,WAAW,mBAAmB,WAAW,aAAa,OAAO,cAAoC;AACrG,QAAI;AACJ,QAAI;AACF,eAAS,MAAM,WAAW,WAAW,UAAU,IAAI;AAAA,IACrD,SAAS,KAAK;AACZ,aAAO;AAAA,QACL,MAAM;AAAA,QACN,OAAO;AAAA,UACL,MAAM,UAAU;AAAA,UAChB,YAAY,UAAU;AAAA,UACtB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,QAAA;AAAA,MACxD;AAAA,IAEJ;AACA,UAAM,aAA8B;AAAA,MAClC,MAAM,UAAU;AAAA,MAChB,YAAY,UAAU;AAAA,MACtB,OAAO,OAAO;AAAA,MACd,GAAI,OAAO,cAAc,EAAE,aAAa,OAAO,YAAA,IAAgB,CAAA;AAAA,IAAC;AAElE,WAAO,EAAE,MAAM,OAAO,QAAQ,UAAU,WAAW,QAAQ,WAAA;AAAA,EAC7D,CAAC;AAGD,QAAM,aAAa,CAAC,GAAG,YAAY,QAAA,CAAS,EAAE,IAAI,CAAC,CAAC,WAAW,UAAU,MAAM;AAC7E,UAAM,YAAY,kBAAkB,IAAI,SAAS;AACjD,QAAI,CAAC,UAAW,QAAO,QAAQ,QAAQ,CAAA,CAAE;AACzC,WAAO,mBAAmB,YAAY,aAAa,OAAO,cAAoC;AAC5F,UAAI;AACJ,UAAI;AACF,gBAAQ,MAAM,UAAU,UAAU,UAAU,IAAI;AAAA,MAClD,SAAS,KAAK;AACZ,eAAO;AAAA,UACL,MAAM;AAAA,UACN,OAAO;AAAA,YACL,MAAM,UAAU;AAAA,YAChB,YAAY,UAAU;AAAA,YACtB,OAAO,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAAA,UAAA;AAAA,QACxD;AAAA,MAEJ;AACA,YAAM,aAA8B;AAAA,QAClC,MAAM,UAAU;AAAA,QAChB,YAAY,UAAU;AAAA,QACtB,OAAO,QAAQ;AAAA,MAAA;AAEjB,aAAO,EAAE,MAAM,QAAQ,IAAI,UAAU,WAAW,QAAQ,WAAA;AAAA,IAC1D,CAAC;AAAA,EACH,CAAC;AAED,QAAM,eAAe,MAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,KAAA;AAEnE,QAAM,QAA2B,CAAA;AACjC,QAAM,UAA6B,CAAA;AACnC,QAAM,SAA2B,CAAA;AAEjC,aAAW,WAAW,aAAa;AACjC,QAAI,QAAQ,SAAS,QAAS,OAAM,KAAK,QAAQ,MAAM;AAAA,aAC9C,QAAQ,SAAS,UAAW,SAAQ,KAAK,QAAQ,MAAM;AAAA,QAC3D,QAAO,KAAK,QAAQ,KAAK;AAAA,EAChC;AAEA,SAAO,EAAE,OAAO,SAAS,OAAA;AAC3B;AAMA,SAAS,oBAAoB,MAAsB;AACjD,SAAO,YAAY,KAAK,QAAQ,MAAM,EAAE,CAAC;AAC3C;AAMA,SAAS,sBAAsB,MAAsB;AACnD,SAAO,eAAe,KAAK,QAAQ,MAAM,EAAE,CAAC;AAC9C;AAEA,SAAS,qBACP,YACA,UACA,YACA,cACA,SACqB;AACrB,SAAO;AAAA,IACL;AAAA,IACA,WAAW,OAAO,SAAkC;AAClD,UAAI,SAAS,OAAO;AAClB,cAAM,SAAS,QAAQ,MAAM,IAAI,YAAY,IAAI;AACjD,YAAI,OAAQ,QAAO,OAAO,QAAQ,IAAI;AAAA,MACxC;AAEA,YAAM,QAAyB;AAAA,QAC7B,QAAQ,WAAW,IAAI;AAAA,QACvB,UAAU;AAAA,MAAA;AAEZ,YAAM,QAAQ,MAAM,SAAS,MAAM,KAAK;AAExC,UAAI,SAAS,OAAO;AAClB,gBAAQ,MAAM,IAAI,YAAY,MAAM,EAAE,MAAM,OAAO,QAAQ,GAAG;AAAA,MAChE;AAEA,aAAO;AAAA,IACT;AAAA,EAAA;AAEJ;AAEO,SAAS,yBACd,UACA,SACqB;AACrB,SAAO,qBAAqB,QAAQ,UAAU,qBAAqB,QAAQ,OAAO;AACpF;AAEO,SAAS,2BACd,UACA,SACqB;AACrB,SAAO,qBAAqB,UAAU,UAAU,uBAAuB,UAAU,OAAO;AAC1F;"}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Compute the Levenshtein (edit) distance between two strings
3
+ * using the Wagner-Fischer dynamic programming algorithm.
4
+ */
5
+ export declare function levenshteinDistance(a: string, b: string): number;
6
+ //# sourceMappingURL=levenshtein.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"levenshtein.d.ts","sourceRoot":"","sources":["../../src/utils/levenshtein.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAgBhE"}
@@ -0,0 +1,21 @@
1
+ function levenshteinDistance(a, b) {
2
+ const m = a.length, n = b.length;
3
+ const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
4
+ for (let i = 0; i <= m; i++) dp[i][0] = i;
5
+ for (let j = 0; j <= n; j++) dp[0][j] = j;
6
+ for (let i = 1; i <= m; i++) {
7
+ for (let j = 1; j <= n; j++) {
8
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
9
+ dp[i][j] = Math.min(
10
+ dp[i - 1][j] + 1,
11
+ dp[i][j - 1] + 1,
12
+ dp[i - 1][j - 1] + cost
13
+ );
14
+ }
15
+ }
16
+ return dp[m][n];
17
+ }
18
+ export {
19
+ levenshteinDistance
20
+ };
21
+ //# sourceMappingURL=levenshtein.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"levenshtein.js","sources":["../../src/utils/levenshtein.ts"],"sourcesContent":["/**\n * Compute the Levenshtein (edit) distance between two strings\n * using the Wagner-Fischer dynamic programming algorithm.\n */\nexport function levenshteinDistance(a: string, b: string): number {\n const m = a.length, n = b.length;\n const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));\n for (let i = 0; i <= m; i++) dp[i]![0] = i;\n for (let j = 0; j <= n; j++) dp[0]![j] = j;\n for (let i = 1; i <= m; i++) {\n for (let j = 1; j <= n; j++) {\n const cost = a[i - 1] === b[j - 1] ? 0 : 1;\n dp[i]![j] = Math.min(\n dp[i - 1]![j]! + 1,\n dp[i]![j - 1]! + 1,\n dp[i - 1]![j - 1]! + cost\n );\n }\n }\n return dp[m]![n]!;\n}\n"],"names":[],"mappings":"AAIO,SAAS,oBAAoB,GAAW,GAAmB;AAChE,QAAM,IAAI,EAAE,QAAQ,IAAI,EAAE;AAC1B,QAAM,KAAiB,MAAM,KAAK,EAAE,QAAQ,IAAI,EAAA,GAAK,MAAM,MAAM,IAAI,CAAC,EAAE,KAAK,CAAC,CAAC;AAC/E,WAAS,IAAI,GAAG,KAAK,GAAG,IAAK,IAAG,CAAC,EAAG,CAAC,IAAI;AACzC,WAAS,IAAI,GAAG,KAAK,GAAG,IAAK,IAAG,CAAC,EAAG,CAAC,IAAI;AACzC,WAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,aAAS,IAAI,GAAG,KAAK,GAAG,KAAK;AAC3B,YAAM,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI;AACzC,SAAG,CAAC,EAAG,CAAC,IAAI,KAAK;AAAA,QACf,GAAG,IAAI,CAAC,EAAG,CAAC,IAAK;AAAA,QACjB,GAAG,CAAC,EAAG,IAAI,CAAC,IAAK;AAAA,QACjB,GAAG,IAAI,CAAC,EAAG,IAAI,CAAC,IAAK;AAAA,MAAA;AAAA,IAEzB;AAAA,EACF;AACA,SAAO,GAAG,CAAC,EAAG,CAAC;AACjB;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ncukondo/search-hub",
3
- "version": "0.12.2",
3
+ "version": "0.13.0",
4
4
  "description": "A CLI tool for systematic literature searching across multiple academic databases",
5
5
  "type": "module",
6
6
  "engines": {