pubid 2.0.0.pre.alpha.2 → 2.0.0.pre.alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (458) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +5 -1
  3. data/data/nist/update_codes.yaml +25 -0
  4. data/lib/pubid/amca/builder.rb +2 -2
  5. data/lib/pubid/amca/identifier.rb +7 -39
  6. data/lib/pubid/amca/identifiers/base.rb +0 -26
  7. data/lib/pubid/amca/identifiers/interpretation.rb +0 -17
  8. data/lib/pubid/amca/identifiers/publication.rb +0 -13
  9. data/lib/pubid/amca/renderer.rb +82 -0
  10. data/lib/pubid/amca/single_identifier.rb +0 -23
  11. data/lib/pubid/amca/urn_parser.rb +28 -0
  12. data/lib/pubid/amca.rb +42 -1
  13. data/lib/pubid/ansi/builder.rb +5 -3
  14. data/lib/pubid/ansi/identifier.rb +1 -43
  15. data/lib/pubid/ansi/identifiers/american_national_standard.rb +2 -1
  16. data/lib/pubid/ansi/identifiers/standard.rb +2 -3
  17. data/lib/pubid/ansi/renderer.rb +53 -0
  18. data/lib/pubid/ansi/single_identifier.rb +2 -31
  19. data/lib/pubid/ansi/urn_generator.rb +3 -38
  20. data/lib/pubid/ansi/urn_parser.rb +23 -0
  21. data/lib/pubid/ansi.rb +38 -3
  22. data/lib/pubid/api/builder.rb +29 -74
  23. data/lib/pubid/api/identifier.rb +0 -51
  24. data/lib/pubid/api/identifiers/base.rb +0 -2
  25. data/lib/pubid/api/identifiers/bulletin.rb +0 -2
  26. data/lib/pubid/api/identifiers/continuous_operations_standard.rb +0 -2
  27. data/lib/pubid/api/identifiers/mpms.rb +1 -17
  28. data/lib/pubid/api/identifiers/publication.rb +0 -2
  29. data/lib/pubid/api/identifiers/recommended_practice.rb +0 -2
  30. data/lib/pubid/api/identifiers/specification.rb +0 -2
  31. data/lib/pubid/api/identifiers/standard.rb +0 -2
  32. data/lib/pubid/api/identifiers/technical_report.rb +0 -2
  33. data/lib/pubid/api/identifiers/typeless_standard.rb +1 -14
  34. data/lib/pubid/api/identifiers.rb +18 -0
  35. data/lib/pubid/api/renderer.rb +89 -0
  36. data/lib/pubid/api/single_identifier.rb +1 -13
  37. data/lib/pubid/api/urn_generator.rb +0 -18
  38. data/lib/pubid/api/urn_parser.rb +35 -0
  39. data/lib/pubid/api.rb +51 -5
  40. data/lib/pubid/ashrae/builder.rb +3 -3
  41. data/lib/pubid/ashrae/identifier.rb +6 -39
  42. data/lib/pubid/ashrae/identifiers/addenda_package.rb +0 -10
  43. data/lib/pubid/ashrae/identifiers/addendum.rb +0 -19
  44. data/lib/pubid/ashrae/identifiers/base.rb +3 -0
  45. data/lib/pubid/ashrae/identifiers/combined_addenda.rb +0 -15
  46. data/lib/pubid/ashrae/identifiers/errata.rb +0 -10
  47. data/lib/pubid/ashrae/identifiers/interpretation.rb +0 -10
  48. data/lib/pubid/ashrae/renderer.rb +117 -0
  49. data/lib/pubid/ashrae/single_identifier.rb +0 -13
  50. data/lib/pubid/ashrae/urn_generator.rb +0 -8
  51. data/lib/pubid/ashrae/urn_parser.rb +27 -0
  52. data/lib/pubid/ashrae.rb +42 -1
  53. data/lib/pubid/asme/components/code.rb +10 -2
  54. data/lib/pubid/asme/identifier.rb +0 -46
  55. data/lib/pubid/asme/identifiers/base.rb +0 -60
  56. data/lib/pubid/asme/renderer.rb +66 -0
  57. data/lib/pubid/asme/urn_parser.rb +31 -0
  58. data/lib/pubid/asme.rb +42 -1
  59. data/lib/pubid/astm/components/code.rb +9 -0
  60. data/lib/pubid/{jis → astm}/components.rb +1 -1
  61. data/lib/pubid/astm/identifier.rb +0 -77
  62. data/lib/pubid/astm/identifiers/adjunct.rb +0 -8
  63. data/lib/pubid/astm/identifiers/data_series.rb +0 -14
  64. data/lib/pubid/astm/identifiers/iso_dual_published.rb +9 -34
  65. data/lib/pubid/astm/identifiers/manual.rb +0 -27
  66. data/lib/pubid/astm/identifiers/monograph.rb +0 -14
  67. data/lib/pubid/astm/identifiers/research_report.rb +0 -7
  68. data/lib/pubid/astm/identifiers/standard.rb +0 -39
  69. data/lib/pubid/astm/identifiers/technical_report.rb +0 -13
  70. data/lib/pubid/astm/identifiers/work_in_progress.rb +0 -11
  71. data/lib/pubid/astm/identifiers.rb +18 -0
  72. data/lib/pubid/astm/renderer.rb +172 -0
  73. data/lib/pubid/astm/single_identifier.rb +0 -10
  74. data/lib/pubid/astm/urn_parser.rb +30 -0
  75. data/lib/pubid/astm.rb +39 -27
  76. data/lib/pubid/bsi/builder.rb +21 -12
  77. data/lib/pubid/bsi/identifier.rb +8 -62
  78. data/lib/pubid/bsi/identifiers/addendum_document.rb +3 -33
  79. data/lib/pubid/bsi/identifiers/adopted_european_norm.rb +11 -47
  80. data/lib/pubid/bsi/identifiers/adopted_international_standard.rb +11 -38
  81. data/lib/pubid/bsi/identifiers/aerospace_standard.rb +3 -53
  82. data/lib/pubid/bsi/identifiers/amendment.rb +3 -19
  83. data/lib/pubid/bsi/identifiers/british_industrial_practice.rb +2 -4
  84. data/lib/pubid/bsi/identifiers/british_standard.rb +2 -1
  85. data/lib/pubid/bsi/identifiers/bundled_identifier.rb +3 -84
  86. data/lib/pubid/bsi/identifiers/committee_document.rb +1 -14
  87. data/lib/pubid/bsi/identifiers/consolidated_identifier.rb +3 -84
  88. data/lib/pubid/bsi/identifiers/corrigendum.rb +3 -7
  89. data/lib/pubid/bsi/identifiers/detailed_specification.rb +1 -34
  90. data/lib/pubid/bsi/identifiers/disc.rb +1 -27
  91. data/lib/pubid/bsi/identifiers/draft_document.rb +3 -44
  92. data/lib/pubid/bsi/identifiers/electronic_book.rb +3 -36
  93. data/lib/pubid/bsi/identifiers/expert_commentary.rb +3 -15
  94. data/lib/pubid/bsi/identifiers/explanatory_supplement.rb +1 -45
  95. data/lib/pubid/bsi/identifiers/flex.rb +1 -33
  96. data/lib/pubid/bsi/identifiers/handbook.rb +2 -13
  97. data/lib/pubid/bsi/identifiers/index.rb +1 -30
  98. data/lib/pubid/bsi/identifiers/method.rb +1 -39
  99. data/lib/pubid/bsi/identifiers/national_annex.rb +5 -27
  100. data/lib/pubid/bsi/identifiers/practice_guide.rb +2 -4
  101. data/lib/pubid/bsi/identifiers/publicly_available_specification.rb +3 -52
  102. data/lib/pubid/bsi/identifiers/published_document.rb +3 -52
  103. data/lib/pubid/bsi/identifiers/section.rb +1 -28
  104. data/lib/pubid/bsi/identifiers/set.rb +3 -17
  105. data/lib/pubid/bsi/identifiers/standalone_amendment.rb +1 -7
  106. data/lib/pubid/bsi/identifiers/supplement_document.rb +3 -21
  107. data/lib/pubid/bsi/identifiers/supplementary_index.rb +1 -44
  108. data/lib/pubid/bsi/identifiers/technical_specification.rb +3 -45
  109. data/lib/pubid/bsi/identifiers/test_method.rb +1 -30
  110. data/lib/pubid/bsi/identifiers/value_added_publication.rb +3 -14
  111. data/lib/pubid/bsi/identifiers.rb +0 -1
  112. data/lib/pubid/bsi/renderer.rb +1050 -0
  113. data/lib/pubid/bsi/single_identifier.rb +6 -70
  114. data/lib/pubid/bsi/urn_generator.rb +2 -3
  115. data/lib/pubid/bsi/urn_parser.rb +52 -0
  116. data/lib/pubid/bsi.rb +224 -1
  117. data/lib/pubid/builder/base.rb +57 -10
  118. data/lib/pubid/bundled_identifier.rb +0 -1
  119. data/lib/pubid/ccsds/builder.rb +4 -3
  120. data/lib/pubid/ccsds/identifier.rb +63 -66
  121. data/lib/pubid/ccsds/identifiers/base.rb +11 -61
  122. data/lib/pubid/ccsds/identifiers/corrigendum.rb +7 -6
  123. data/lib/pubid/ccsds/parser.rb +4 -2
  124. data/lib/pubid/ccsds/supplement_identifier.rb +15 -11
  125. data/lib/pubid/ccsds/urn_generator.rb +3 -3
  126. data/lib/pubid/ccsds/urn_parser.rb +20 -0
  127. data/lib/pubid/ccsds.rb +39 -1
  128. data/lib/pubid/cen_cenelec/builder.rb +12 -14
  129. data/lib/pubid/cen_cenelec/identifier.rb +7 -38
  130. data/lib/pubid/cen_cenelec/identifiers/adopted_european_norm.rb +13 -4
  131. data/lib/pubid/cen_cenelec/identifiers/amendment.rb +2 -8
  132. data/lib/pubid/cen_cenelec/identifiers/base.rb +5 -41
  133. data/lib/pubid/cen_cenelec/identifiers/cen_report.rb +2 -1
  134. data/lib/pubid/cen_cenelec/identifiers/cen_workshop_agreement.rb +2 -1
  135. data/lib/pubid/cen_cenelec/identifiers/consolidated_identifier.rb +2 -25
  136. data/lib/pubid/cen_cenelec/identifiers/corrigendum.rb +2 -13
  137. data/lib/pubid/cen_cenelec/identifiers/european_norm.rb +2 -1
  138. data/lib/pubid/cen_cenelec/identifiers/european_prestandard.rb +4 -7
  139. data/lib/pubid/cen_cenelec/identifiers/european_specification.rb +2 -1
  140. data/lib/pubid/cen_cenelec/identifiers/fragment.rb +2 -2
  141. data/lib/pubid/cen_cenelec/identifiers/harmonization_document.rb +2 -1
  142. data/lib/pubid/cen_cenelec/identifiers/technical_report.rb +2 -1
  143. data/lib/pubid/cen_cenelec/identifiers/technical_specification.rb +2 -1
  144. data/lib/pubid/cen_cenelec/renderer.rb +261 -0
  145. data/lib/pubid/cen_cenelec/single_identifier.rb +11 -89
  146. data/lib/pubid/cen_cenelec/urn_generator.rb +6 -6
  147. data/lib/pubid/cen_cenelec/urn_parser.rb +28 -0
  148. data/lib/pubid/cen_cenelec.rb +168 -1
  149. data/lib/pubid/cie/components/code.rb +8 -0
  150. data/lib/pubid/cie/identifier.rb +6 -57
  151. data/lib/pubid/cie/urn_parser.rb +28 -0
  152. data/lib/pubid/cie.rb +43 -1
  153. data/lib/pubid/components/adoption.rb +104 -0
  154. data/lib/pubid/components/code.rb +22 -8
  155. data/lib/pubid/components/date.rb +23 -16
  156. data/lib/pubid/components/edition.rb +9 -6
  157. data/lib/pubid/components/iteration.rb +32 -0
  158. data/lib/pubid/components/language.rb +6 -4
  159. data/lib/pubid/components/locality.rb +10 -1
  160. data/lib/pubid/components/publisher.rb +9 -6
  161. data/lib/pubid/components/relationship.rb +151 -0
  162. data/lib/pubid/components/stage.rb +5 -14
  163. data/lib/pubid/components/supplement.rb +184 -0
  164. data/lib/pubid/components/type.rb +5 -15
  165. data/lib/pubid/components/typed_stage.rb +10 -11
  166. data/lib/pubid/components.rb +4 -1
  167. data/lib/pubid/core/update_codes.rb +28 -7
  168. data/lib/pubid/csa/identifier.rb +0 -59
  169. data/lib/pubid/csa/identifiers/base.rb +2 -122
  170. data/lib/pubid/csa/identifiers/cec.rb +2 -101
  171. data/lib/pubid/csa/identifiers/series.rb +2 -102
  172. data/lib/pubid/csa/renderer.rb +292 -0
  173. data/lib/pubid/csa/urn_generator.rb +1 -1
  174. data/lib/pubid/csa/urn_parser.rb +33 -0
  175. data/lib/pubid/csa.rb +42 -1
  176. data/lib/pubid/etsi/components/code.rb +9 -2
  177. data/lib/pubid/etsi/identifier.rb +0 -43
  178. data/lib/pubid/etsi/identifiers/base.rb +1 -4
  179. data/lib/pubid/etsi/identifiers/supplement_identifier.rb +2 -9
  180. data/lib/pubid/etsi/renderer.rb +42 -0
  181. data/lib/pubid/etsi/urn_parser.rb +34 -0
  182. data/lib/pubid/etsi.rb +42 -1
  183. data/lib/pubid/export/exporter.rb +4 -46
  184. data/lib/pubid/export/flavor_exporter.rb +111 -278
  185. data/lib/pubid/export.rb +0 -6
  186. data/lib/pubid/identifier.rb +2 -17
  187. data/lib/pubid/identifier_facade.rb +114 -0
  188. data/lib/pubid/identifier_metadata.rb +1 -1
  189. data/lib/pubid/idf/builder.rb +3 -3
  190. data/lib/pubid/idf/identifier.rb +3 -66
  191. data/lib/pubid/idf/identifiers/amendment.rb +2 -1
  192. data/lib/pubid/idf/identifiers/corrigendum.rb +2 -1
  193. data/lib/pubid/idf/identifiers/international_standard.rb +2 -1
  194. data/lib/pubid/idf/identifiers/reviewed_method.rb +2 -1
  195. data/lib/pubid/idf/parser.rb +3 -2
  196. data/lib/pubid/idf/renderer.rb +84 -0
  197. data/lib/pubid/idf/supplement_identifier.rb +2 -10
  198. data/lib/pubid/idf/urn_generator.rb +4 -39
  199. data/lib/pubid/idf/urn_parser.rb +25 -0
  200. data/lib/pubid/idf.rb +51 -1
  201. data/lib/pubid/iec/builder.rb +46 -64
  202. data/lib/pubid/iec/components/code.rb +8 -32
  203. data/lib/pubid/iec/components/publisher.rb +0 -1
  204. data/lib/pubid/iec/components.rb +14 -0
  205. data/lib/pubid/iec/identifier.rb +251 -213
  206. data/lib/pubid/iec/identifiers/amendment.rb +2 -3
  207. data/lib/pubid/iec/identifiers/base.rb +8 -32
  208. data/lib/pubid/iec/identifiers/component_specification.rb +3 -3
  209. data/lib/pubid/iec/identifiers/conformity_assessment.rb +1 -2
  210. data/lib/pubid/iec/identifiers/consolidated_identifier.rb +27 -26
  211. data/lib/pubid/iec/identifiers/corrigendum.rb +2 -3
  212. data/lib/pubid/iec/identifiers/fragment_identifier.rb +37 -22
  213. data/lib/pubid/iec/identifiers/guide.rb +0 -2
  214. data/lib/pubid/iec/identifiers/international_standard.rb +2 -3
  215. data/lib/pubid/iec/identifiers/interpretation_sheet.rb +2 -3
  216. data/lib/pubid/iec/identifiers/operational_document.rb +3 -3
  217. data/lib/pubid/iec/identifiers/publicly_available_specification.rb +2 -3
  218. data/lib/pubid/iec/identifiers/sheet_identifier.rb +21 -11
  219. data/lib/pubid/iec/identifiers/societal_technology_trend_report.rb +3 -3
  220. data/lib/pubid/iec/identifiers/systems_reference_document.rb +2 -3
  221. data/lib/pubid/iec/identifiers/technical_report.rb +2 -3
  222. data/lib/pubid/iec/identifiers/technical_specification.rb +2 -3
  223. data/lib/pubid/iec/identifiers/technology_report.rb +1 -2
  224. data/lib/pubid/iec/identifiers/test_report_form.rb +5 -34
  225. data/lib/pubid/iec/identifiers/vap_identifier.rb +26 -19
  226. data/lib/pubid/iec/identifiers/white_paper.rb +3 -3
  227. data/lib/pubid/iec/identifiers/working_document.rb +4 -48
  228. data/lib/pubid/iec/identifiers.rb +30 -0
  229. data/lib/pubid/iec/parser.rb +13 -12
  230. data/lib/pubid/iec/renderer.rb +254 -0
  231. data/lib/pubid/iec/single_identifier.rb +6 -12
  232. data/lib/pubid/iec/supplement_identifier.rb +58 -54
  233. data/lib/pubid/iec/urn_generator.rb +3 -3
  234. data/lib/pubid/iec/urn_parser.rb +3 -3
  235. data/lib/pubid/iec.rb +40 -68
  236. data/lib/pubid/ieee/builder.rb +12 -12
  237. data/lib/pubid/ieee/components/code.rb +8 -0
  238. data/lib/pubid/ieee/components/draft.rb +14 -0
  239. data/lib/pubid/ieee/components/relationship.rb +5 -149
  240. data/lib/pubid/ieee/identifier.rb +6 -41
  241. data/lib/pubid/ieee/identifiers/adopted_standard.rb +1 -6
  242. data/lib/pubid/ieee/identifiers/base.rb +101 -458
  243. data/lib/pubid/ieee/identifiers/conformance_identifier.rb +1 -7
  244. data/lib/pubid/ieee/identifiers/corrigendum.rb +1 -9
  245. data/lib/pubid/ieee/identifiers/csa_dual_published.rb +1 -7
  246. data/lib/pubid/ieee/identifiers/dual_identifier.rb +1 -1
  247. data/lib/pubid/ieee/identifiers/dual_published.rb +1 -1
  248. data/lib/pubid/ieee/identifiers/iec_ieee_copublished.rb +1 -6
  249. data/lib/pubid/ieee/identifiers/interpretation_identifier.rb +1 -7
  250. data/lib/pubid/ieee/identifiers/joint_development.rb +2 -0
  251. data/lib/pubid/ieee/identifiers/multi_numbered_identifier.rb +1 -15
  252. data/lib/pubid/ieee/identifiers/parenthetical_identifier.rb +1 -3
  253. data/lib/pubid/ieee/identifiers/project_draft_identifier.rb +15 -0
  254. data/lib/pubid/ieee/identifiers/redlined_standard.rb +1 -4
  255. data/lib/pubid/ieee/identifiers/si_standard.rb +1 -35
  256. data/lib/pubid/ieee/identifiers/standard.rb +1 -1
  257. data/lib/pubid/ieee/pre_parser.rb +301 -0
  258. data/lib/pubid/ieee/renderer.rb +307 -0
  259. data/lib/pubid/ieee/urn_parser.rb +34 -0
  260. data/lib/pubid/ieee.rb +62 -1
  261. data/lib/pubid/ieee_debug.rb +0 -1
  262. data/lib/pubid/iho/builder.rb +2 -2
  263. data/lib/pubid/iho/identifier.rb +8 -42
  264. data/lib/pubid/iho/identifiers/base.rb +49 -10
  265. data/lib/pubid/iho/parser.rb +3 -3
  266. data/lib/pubid/iho/renderer.rb +30 -0
  267. data/lib/pubid/iho/urn_generator.rb +2 -2
  268. data/lib/pubid/iho/urn_parser.rb +58 -0
  269. data/lib/pubid/iho.rb +50 -1
  270. data/lib/pubid/iso/builder.rb +55 -53
  271. data/lib/pubid/iso/bundled_identifier.rb +51 -0
  272. data/lib/pubid/iso/components/code.rb +7 -19
  273. data/lib/pubid/iso/components/publisher.rb +10 -8
  274. data/lib/pubid/iso/components.rb +2 -4
  275. data/lib/pubid/iso/identifier.rb +218 -252
  276. data/lib/pubid/iso/identifiers/addendum.rb +9 -6
  277. data/lib/pubid/iso/identifiers/amendment.rb +8 -4
  278. data/lib/pubid/iso/identifiers/corrigendum.rb +4 -4
  279. data/lib/pubid/iso/identifiers/data.rb +0 -1
  280. data/lib/pubid/iso/identifiers/directives.rb +8 -2
  281. data/lib/pubid/iso/identifiers/directives_supplement.rb +43 -14
  282. data/lib/pubid/iso/identifiers/extract.rb +2 -2
  283. data/lib/pubid/iso/identifiers/guide.rb +0 -1
  284. data/lib/pubid/iso/identifiers/international_standard.rb +4 -4
  285. data/lib/pubid/iso/identifiers/international_standardized_profile.rb +4 -4
  286. data/lib/pubid/iso/identifiers/international_workshop_agreement.rb +10 -4
  287. data/lib/pubid/iso/identifiers/pas.rb +2 -2
  288. data/lib/pubid/iso/identifiers/recommendation.rb +2 -2
  289. data/lib/pubid/iso/identifiers/supplement.rb +11 -3
  290. data/lib/pubid/iso/identifiers/tc_document.rb +44 -15
  291. data/lib/pubid/iso/identifiers/technical_report.rb +4 -4
  292. data/lib/pubid/iso/identifiers/technical_specification.rb +2 -2
  293. data/lib/pubid/iso/identifiers/technology_trends_assessments.rb +2 -2
  294. data/lib/pubid/iso/identifiers.rb +0 -1
  295. data/lib/pubid/iso/normalizer.rb +89 -0
  296. data/lib/pubid/iso/parser.rb +22 -4
  297. data/lib/pubid/iso/supplement_identifier.rb +15 -2
  298. data/lib/pubid/iso/urn_generator.rb +66 -182
  299. data/lib/pubid/iso/urn_parser.rb +12 -7
  300. data/lib/pubid/iso.rb +173 -2
  301. data/lib/pubid/itu/builder.rb +0 -12
  302. data/lib/pubid/itu/components/code.rb +8 -0
  303. data/lib/pubid/itu/components.rb +11 -0
  304. data/lib/pubid/itu/identifier.rb +6 -104
  305. data/lib/pubid/itu/identifiers/amendment.rb +0 -2
  306. data/lib/pubid/itu/identifiers/annex.rb +0 -2
  307. data/lib/pubid/itu/identifiers/base.rb +0 -6
  308. data/lib/pubid/itu/identifiers/combined_identifier.rb +0 -2
  309. data/lib/pubid/itu/identifiers/corrigendum.rb +0 -2
  310. data/lib/pubid/itu/identifiers/recommendation.rb +0 -2
  311. data/lib/pubid/itu/identifiers/special_publication.rb +0 -2
  312. data/lib/pubid/itu/identifiers/supplement.rb +0 -2
  313. data/lib/pubid/itu/urn_parser.rb +23 -0
  314. data/lib/pubid/itu.rb +42 -1
  315. data/lib/pubid/jcgm/builder.rb +16 -8
  316. data/lib/pubid/jcgm/identifier.rb +0 -43
  317. data/lib/pubid/jcgm/identifiers/amendment.rb +2 -7
  318. data/lib/pubid/jcgm/identifiers/gum_guide.rb +2 -10
  319. data/lib/pubid/jcgm/renderer.rb +68 -0
  320. data/lib/pubid/jcgm/single_identifier.rb +1 -5
  321. data/lib/pubid/jcgm/urn_generator.rb +4 -6
  322. data/lib/pubid/jcgm/urn_parser.rb +23 -0
  323. data/lib/pubid/jcgm.rb +43 -2
  324. data/lib/pubid/jis/builder.rb +44 -52
  325. data/lib/pubid/jis/identifier.rb +132 -46
  326. data/lib/pubid/jis/identifiers/amendment.rb +1 -1
  327. data/lib/pubid/jis/identifiers/corrigendum.rb +16 -0
  328. data/lib/pubid/jis/identifiers/standard.rb +2 -1
  329. data/lib/pubid/jis/identifiers/technical_report.rb +2 -1
  330. data/lib/pubid/jis/identifiers/technical_specification.rb +2 -1
  331. data/lib/pubid/jis/identifiers.rb +1 -1
  332. data/lib/pubid/jis/parser.rb +31 -5
  333. data/lib/pubid/jis/renderer.rb +69 -0
  334. data/lib/pubid/jis/single_identifier.rb +6 -12
  335. data/lib/pubid/jis/supplement_identifier.rb +17 -14
  336. data/lib/pubid/jis/urn_parser.rb +23 -0
  337. data/lib/pubid/jis.rb +42 -2
  338. data/lib/pubid/nist/builder.rb +63 -1871
  339. data/lib/pubid/nist/caster.rb +1272 -0
  340. data/lib/pubid/nist/circular_supplement_builder.rb +291 -0
  341. data/lib/pubid/nist/components/code.rb +9 -20
  342. data/lib/pubid/nist/components/supplement.rb +2 -2
  343. data/lib/pubid/nist/components.rb +0 -1
  344. data/lib/pubid/nist/identifier.rb +11 -48
  345. data/lib/pubid/nist/identifiers/base.rb +110 -47
  346. data/lib/pubid/nist/identifiers/circular.rb +7 -2
  347. data/lib/pubid/nist/identifiers/circular_supplement.rb +2 -1
  348. data/lib/pubid/nist/identifiers/commercial_standard.rb +2 -1
  349. data/lib/pubid/nist/identifiers/commercial_standard_emergency.rb +6 -4
  350. data/lib/pubid/nist/identifiers/commercial_standards_monthly.rb +10 -3
  351. data/lib/pubid/nist/identifiers/crpl_report.rb +8 -8
  352. data/lib/pubid/nist/identifiers/dated_document.rb +49 -0
  353. data/lib/pubid/nist/identifiers/federal_information_processing_standards.rb +15 -24
  354. data/lib/pubid/nist/identifiers/grant_contractor_report.rb +2 -1
  355. data/lib/pubid/nist/identifiers/handbook.rb +2 -1
  356. data/lib/pubid/nist/identifiers/internal_report.rb +2 -1
  357. data/lib/pubid/nist/identifiers/letter_circular.rb +2 -1
  358. data/lib/pubid/nist/identifiers/miscellaneous_publication.rb +5 -4
  359. data/lib/pubid/nist/identifiers/monograph.rb +7 -3
  360. data/lib/pubid/nist/identifiers/report.rb +4 -2
  361. data/lib/pubid/nist/identifiers/special_publication.rb +2 -1
  362. data/lib/pubid/nist/identifiers/technical_note.rb +3 -2
  363. data/lib/pubid/nist/identifiers.rb +1 -0
  364. data/lib/pubid/nist/parser.rb +62 -452
  365. data/lib/pubid/nist/parser_output_normalizer.rb +233 -0
  366. data/lib/pubid/nist/preprocessor.rb +416 -0
  367. data/lib/pubid/nist/renderer.rb +43 -0
  368. data/lib/pubid/nist/router.rb +148 -0
  369. data/lib/pubid/nist/series/base.rb +58 -0
  370. data/lib/pubid/nist/series/crpl.rb +13 -0
  371. data/lib/pubid/nist/series/fips.rb +14 -0
  372. data/lib/pubid/nist/series/ir.rb +60 -0
  373. data/lib/pubid/nist/series/letter_preserving.rb +15 -0
  374. data/lib/pubid/nist/series/mono.rb +19 -0
  375. data/lib/pubid/nist/series/ncstar.rb +20 -0
  376. data/lib/pubid/nist/series.rb +49 -0
  377. data/lib/pubid/nist/supplement_identifier.rb +3 -1
  378. data/lib/pubid/nist/urn_parser.rb +67 -0
  379. data/lib/pubid/nist.rb +82 -4
  380. data/lib/pubid/oiml/components/code.rb +10 -0
  381. data/lib/pubid/oiml/identifier.rb +0 -50
  382. data/lib/pubid/oiml/identifiers/annex.rb +3 -45
  383. data/lib/pubid/oiml/identifiers/base.rb +2 -17
  384. data/lib/pubid/oiml/renderer.rb +161 -0
  385. data/lib/pubid/oiml/single_identifier.rb +6 -45
  386. data/lib/pubid/oiml/supplement_identifier.rb +4 -19
  387. data/lib/pubid/oiml/urn_generator.rb +0 -8
  388. data/lib/pubid/oiml/urn_parser.rb +22 -0
  389. data/lib/pubid/oiml.rb +42 -1
  390. data/lib/pubid/plateau/identifier.rb +7 -41
  391. data/lib/pubid/plateau/identifiers/handbook.rb +1 -3
  392. data/lib/pubid/plateau/identifiers/technical_report.rb +1 -1
  393. data/lib/pubid/plateau/renderer.rb +51 -0
  394. data/lib/pubid/plateau/supplement_identifier.rb +1 -1
  395. data/lib/pubid/plateau/urn_parser.rb +43 -0
  396. data/lib/pubid/plateau.rb +43 -1
  397. data/lib/pubid/renderers/directives_renderer.rb +22 -8
  398. data/lib/pubid/renderers/guide_renderer.rb +4 -2
  399. data/lib/pubid/renderers/human_readable.rb +18 -7
  400. data/lib/pubid/rendering/context.rb +28 -19
  401. data/lib/pubid/rendering.rb +0 -3
  402. data/lib/pubid/sae/components/date.rb +8 -0
  403. data/lib/pubid/sae/components/type.rb +5 -1
  404. data/lib/pubid/sae/identifier.rb +0 -23
  405. data/lib/pubid/sae/identifiers/base.rb +2 -16
  406. data/lib/pubid/sae/renderer.rb +36 -0
  407. data/lib/pubid/sae/urn_generator.rb +2 -10
  408. data/lib/pubid/sae/urn_parser.rb +36 -0
  409. data/lib/pubid/sae.rb +42 -1
  410. data/lib/pubid/urn_generator/base.rb +12 -12
  411. data/lib/pubid/urn_parser/base.rb +81 -0
  412. data/lib/pubid/urn_parser/errors.rb +9 -0
  413. data/lib/pubid/urn_parser.rb +14 -0
  414. data/lib/pubid/version.rb +1 -1
  415. data/lib/pubid.rb +29 -7
  416. data/lib/tasks/website-data.json +1940 -1882
  417. metadata +75 -44
  418. data/lib/pubid/amca/scheme.rb +0 -16
  419. data/lib/pubid/ansi/scheme.rb +0 -15
  420. data/lib/pubid/api/scheme.rb +0 -66
  421. data/lib/pubid/ashrae/scheme.rb +0 -53
  422. data/lib/pubid/asme/scheme.rb +0 -37
  423. data/lib/pubid/astm/scheme.rb +0 -55
  424. data/lib/pubid/bsi/identifiers/base.rb +0 -11
  425. data/lib/pubid/bsi/scheme.rb +0 -243
  426. data/lib/pubid/ccsds/scheme.rb +0 -57
  427. data/lib/pubid/cen_cenelec/scheme.rb +0 -164
  428. data/lib/pubid/cie/scheme.rb +0 -64
  429. data/lib/pubid/components/factory.rb +0 -50
  430. data/lib/pubid/csa/scheme.rb +0 -44
  431. data/lib/pubid/etsi/scheme.rb +0 -42
  432. data/lib/pubid/export/data_class_exporter.rb +0 -59
  433. data/lib/pubid/export/ieee_exporter.rb +0 -78
  434. data/lib/pubid/export/itu_exporter.rb +0 -66
  435. data/lib/pubid/export/nist_exporter.rb +0 -64
  436. data/lib/pubid/export/registry_exporter.rb +0 -90
  437. data/lib/pubid/export/scheme_exporter.rb +0 -70
  438. data/lib/pubid/identifier_registry.rb +0 -198
  439. data/lib/pubid/idf/scheme.rb +0 -61
  440. data/lib/pubid/iec/scheme.rb +0 -71
  441. data/lib/pubid/ieee/scheme.rb +0 -90
  442. data/lib/pubid/iho/scheme.rb +0 -29
  443. data/lib/pubid/iso/identifiers/base.rb +0 -115
  444. data/lib/pubid/iso/scheme.rb +0 -193
  445. data/lib/pubid/itu/scheme.rb +0 -174
  446. data/lib/pubid/jcgm/scheme.rb +0 -60
  447. data/lib/pubid/jis/components/code.rb +0 -59
  448. data/lib/pubid/jis/identifiers/base.rb +0 -72
  449. data/lib/pubid/jis/scheme.rb +0 -49
  450. data/lib/pubid/nist/components/publisher.rb +0 -24
  451. data/lib/pubid/nist/scheme.rb +0 -199
  452. data/lib/pubid/oiml/scheme.rb +0 -46
  453. data/lib/pubid/plateau/scheme.rb +0 -45
  454. data/lib/pubid/rendering/base.rb +0 -73
  455. data/lib/pubid/rendering/common.rb +0 -211
  456. data/lib/pubid/rendering/format.rb +0 -25
  457. data/lib/pubid/sae/scheme.rb +0 -47
  458. data/lib/pubid/scheme.rb +0 -219
@@ -0,0 +1,233 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pubid
4
+ module Nist
5
+ # Normalizes the raw hash produced by the NIST parser before the Builder
6
+ # constructs the identifier object.
7
+ #
8
+ # The parser emits a flat hash with keys like :first_number, :second_number,
9
+ # :edition_dash_year, :update_prefix, etc. Many of those keys are
10
+ # *incompatible shapes* — e.g. parser captures a year as :edition_dash_year
11
+ # when it is actually a second_number, or a letter+digits suffix lives
12
+ # inside :first_number when it should become a Part component.
13
+ #
14
+ # Each `normalize_*` method here performs one such shape correction,
15
+ # mutating the hash in place. The Normalizer is intentionally
16
+ # *side-effect-only*: it never reads from the Builder, the Caster, or the
17
+ # identifier classes, so it can be tested in isolation.
18
+ #
19
+ # Pre-processing blocks that need to surface extracted components to the
20
+ # Builder (e.g. letter-suffix Part components, embedded-edition objects)
21
+ # remain in Builder#build because they create local variables that flow
22
+ # into the construction phase. All other normalizations live here.
23
+ class ParserOutputNormalizer
24
+ # Range of years we treat as "looks like a calendar year" when
25
+ # disambiguating :edition_dash_year from :second_number.
26
+ VALID_YEAR_RANGE = (1901..2026).freeze
27
+
28
+ # Series that treat :edition_dash_year as a year-only edition when the
29
+ # dash year falls in VALID_YEAR_RANGE. For other series with a dash-year
30
+ # in this range, the dash-year is interpreted differently (or kept as
31
+ # a compound number, depending on the branch).
32
+ DASH_YEAR_AS_EDITION_SERIES = %w[HB CS FIPS].freeze
33
+
34
+ # Apply all normalizations to the parsed hash in the correct order.
35
+ # @param parsed_hash [Hash] parser output (mutated in place)
36
+ # @return [Hash] the same hash, normalized
37
+ def normalize(parsed_hash)
38
+ merge_edition_e_into_update(parsed_hash)
39
+ extract_embedded_edition_with_year(parsed_hash)
40
+ extract_embedded_edition_without_dash_year(parsed_hash)
41
+ split_second_number_edition_year(parsed_hash)
42
+ split_fips_month_year_after_part(parsed_hash)
43
+ disambiguate_ir_compound_vs_edition(parsed_hash)
44
+ disambiguate_dash_year(parsed_hash)
45
+ parsed_hash
46
+ end
47
+
48
+ private
49
+
50
+ # Pattern: "800-53r4/Upd3-2015"
51
+ # Parser captures "-2015" as :edition_e but it belongs on :update.
52
+ def merge_edition_e_into_update(parsed_hash)
53
+ return unless parsed_hash[:update_prefix] && parsed_hash[:update] && parsed_hash[:edition_e]
54
+
55
+ edition_id = parsed_hash[:edition_e][:edition_id]
56
+ parsed_hash[:update] = parsed_hash[:update].merge(update_year: edition_id)
57
+ parsed_hash.delete(:edition_e)
58
+ end
59
+
60
+ # Pattern: "44e2-1955"
61
+ # first_number="44e2", edition_dash_year="1955"
62
+ # Result: first_number="44", edition(type:"e", id:"2", additional_text:"1955")
63
+ def extract_embedded_edition_with_year(parsed_hash)
64
+ return unless parsed_hash[:first_number]&.to_s&.match?(/^[0-9]+[a-zA-Z]\d+$/) &&
65
+ parsed_hash[:edition_dash_year]
66
+
67
+ number_str = parsed_hash[:first_number].to_s
68
+ return unless (match_data = number_str.match(/^(\d+)([a-zA-Z])(\d+)$/))
69
+
70
+ base_number, edition_type, edition_id = match_data[1], match_data[2].downcase, match_data[3]
71
+
72
+ parsed_hash[:first_number] = Components::Code.new(value: base_number)
73
+ parsed_hash[:edition_with_year] = Components::Edition.new(
74
+ type: edition_type,
75
+ id: edition_id,
76
+ additional_text: parsed_hash[:edition_dash_year][:dash_year],
77
+ )
78
+ parsed_hash.delete(:edition_dash_year)
79
+ end
80
+
81
+ # Pattern: "8115r1" (with no edition_dash_year)
82
+ # first_number="8115r1", no second_number
83
+ # Result: first_number="8115", edition(type:"r", id:"1")
84
+ #
85
+ # CRITICAL: Only when no :second_number is present, otherwise the
86
+ # compound-number logic in the Builder handles the pattern.
87
+ def extract_embedded_edition_without_dash_year(parsed_hash)
88
+ return if parsed_hash[:second_number]
89
+ return unless parsed_hash[:first_number]&.to_s&.match?(/^[0-9]+[a-zA-Z]\d+$/)
90
+ return if parsed_hash[:edition_dash_year]
91
+
92
+ number_str = parsed_hash[:first_number].to_s
93
+ return unless (match_data = number_str.match(/^(\d+)([a-zA-Z])(\d+)$/))
94
+
95
+ base_number, edition_type, edition_id = match_data[1], match_data[2].downcase, match_data[3]
96
+
97
+ parsed_hash[:first_number] = Components::Code.new(value: base_number)
98
+ parsed_hash[:edition_with_year] = Components::Edition.new(
99
+ type: edition_type,
100
+ id: edition_id,
101
+ )
102
+ end
103
+
104
+ # Pattern: "105-1-1990"
105
+ # Parser returns second_number_edition_year={second_number:"1", dash_year:"1990"}
106
+ # Result: :second_number="1", plus either :edition_from_year (HB series)
107
+ # or :edition_dash_year (other series) for further downstream processing.
108
+ def split_second_number_edition_year(parsed_hash)
109
+ return unless parsed_hash[:second_number_edition_year]
110
+
111
+ combined = parsed_hash[:second_number_edition_year]
112
+ parsed_hash[:second_number] = combined[:second_number]
113
+ dash_year = combined[:dash_year]
114
+
115
+ is_handbook = safely_to_s(parsed_hash[:series]) == "HB"
116
+ if is_handbook && dash_year.to_s.match?(/^\d{4}$/)
117
+ parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: dash_year)
118
+ else
119
+ parsed_hash[:edition_dash_year] = { dash_year: dash_year }
120
+ end
121
+
122
+ parsed_hash.delete(:second_number_edition_year)
123
+ end
124
+
125
+ # Pattern: "11-1-Sep1977"
126
+ # Parser returns fips_month_year_after_part={second_number:"1", edition_month:"Sep", edition_year:"1977"}
127
+ # Result: :second_number="1", :edition_from_year(type:"e", id:"197709")
128
+ def split_fips_month_year_after_part(parsed_hash)
129
+ return unless parsed_hash[:fips_month_year_after_part]
130
+
131
+ combined = parsed_hash[:fips_month_year_after_part]
132
+ parsed_hash[:second_number] = combined[:second_number]
133
+ month_str = combined[:edition_month]
134
+ year_str = combined[:edition_year]
135
+
136
+ month_num = month_to_number(month_str)
137
+ edition_id = month_num&.positive? ? "#{year_str}#{format('%02d', month_num)}" : year_str
138
+
139
+ parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: edition_id)
140
+ parsed_hash.delete(:fips_month_year_after_part)
141
+ end
142
+
143
+ # Pattern: "84-2946" with series=IR
144
+ # For IR (Interagency Report), a 2-digit first_number followed by a
145
+ # 4-digit dash-year is *almost* always a year-only edition (e.g.
146
+ # "76e1100") — except when the 4-digit number is clearly not a year
147
+ # (>= 2700) or there is an embedded :edition_e (compound number).
148
+ def disambiguate_ir_compound_vs_edition(parsed_hash)
149
+ return unless safely_to_s(parsed_hash[:series]) == "IR"
150
+ return unless parsed_hash[:first_number] && parsed_hash[:edition_dash_year]
151
+
152
+ first_num = parsed_hash[:first_number].to_s
153
+ dash_year = parsed_hash[:edition_dash_year][:dash_year].to_s
154
+ return unless first_num.match?(/^\d{2}$/) && dash_year.match?(/^\d{4}$/)
155
+
156
+ dash_year_num = dash_year.to_i
157
+ is_valid_year = VALID_YEAR_RANGE.cover?(dash_year_num)
158
+ has_embedded_edition = parsed_hash[:edition_e]
159
+
160
+ if is_valid_year && !has_embedded_edition
161
+ parsed_hash[:first_number] = Components::Code.new(value: first_num)
162
+ parsed_hash[:edition] = Components::Edition.new(type: "e", id: dash_year)
163
+ else
164
+ parsed_hash[:first_number] = Components::Code.new(value: "#{first_num}-#{dash_year}")
165
+ end
166
+ parsed_hash.delete(:edition_dash_year)
167
+ end
168
+
169
+ # Pattern: "250-1039" or "15-1000" or "1946-1947" (RPT)
170
+ # When the parser captures a :first_number plus :edition_dash_year, the
171
+ # dash year is interpreted differently per series:
172
+ # - RPT: always join into a compound (date range), regardless of year
173
+ # - GCR: always convert dash-year to year-only edition
174
+ # - IR: convert dash-year to year-only edition only if it is a valid year
175
+ # - HB/CS/FIPS: convert dash-year to year-only edition only if valid year
176
+ # - others: drop :edition_dash_year, or stash as :second_number if < 1900
177
+ def disambiguate_dash_year(parsed_hash)
178
+ return unless parsed_hash[:first_number] && parsed_hash[:edition_dash_year]
179
+ return if parsed_hash[:first_number].to_s.match?(/^[0-9]+[a-zA-Z]\d+$/)
180
+
181
+ dash_year = parsed_hash[:edition_dash_year][:dash_year].to_s
182
+ series = safely_to_s(parsed_hash[:series])
183
+ dash_year_num = dash_year.to_i
184
+ is_valid_year = VALID_YEAR_RANGE.cover?(dash_year_num)
185
+
186
+ if series == "RPT"
187
+ # RPT date ranges: "1946-1947" -> "1946-1947"
188
+ parsed_hash[:first_number] =
189
+ Components::Code.new(value: "#{parsed_hash[:first_number]}-#{dash_year}")
190
+ parsed_hash.delete(:edition_dash_year)
191
+ elsif series == "GCR"
192
+ # GCR always converts dash-year to edition
193
+ stash_edition_from_year(parsed_hash, dash_year)
194
+ elsif series == "IR" && is_valid_year
195
+ # IR converts valid years to edition
196
+ stash_edition_from_year(parsed_hash, dash_year)
197
+ elsif DASH_YEAR_AS_EDITION_SERIES.include?(series) && is_valid_year
198
+ # HB/CS/FIPS: convert dash-year to edition only if valid year
199
+ stash_edition_from_year(parsed_hash, dash_year)
200
+ elsif dash_year_num < 1900
201
+ # For other series, dash-year < 1900 is a second_number
202
+ parsed_hash[:second_number] = dash_year
203
+ parsed_hash.delete(:edition_dash_year)
204
+ end
205
+ # Other cases (non-HB/CS/FIPS with a valid year): leave the keys alone
206
+ # so downstream Builder logic can handle them.
207
+ end
208
+
209
+ # Convert dash-year into a year-only Edition and stash for the Builder
210
+ # to assign after construction.
211
+ def stash_edition_from_year(parsed_hash, dash_year)
212
+ parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: dash_year)
213
+ parsed_hash.delete(:edition_dash_year)
214
+ end
215
+
216
+ # Safely coerce arbitrary values to strings (parsers can hand us Parslet
217
+ # nodes that raise on #to_s in some scenarios).
218
+ def safely_to_s(value)
219
+ value.to_s
220
+ rescue StandardError
221
+ ""
222
+ end
223
+
224
+ # Convert a month abbreviation or name to a 1-based month number.
225
+ # Returns nil when the value is not a recognizable month.
226
+ def month_to_number(month_str)
227
+ Date::ABBR_MONTHNAMES.index(month_str) ||
228
+ Date::MONTHNAMES.index(month_str) ||
229
+ month_str.to_i
230
+ end
231
+ end
232
+ end
233
+ end
@@ -0,0 +1,416 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pubid
4
+ module Nist
5
+ # Owns all regex-based normalization applied to NIST identifier strings
6
+ # before the Parslet grammar sees them.
7
+ #
8
+ # The Parser entry point delegates to Preprocessor#call; the grammar
9
+ # itself never inspects raw user input. Each private method below is a
10
+ # named stage of normalization, applied in the order declared in #call.
11
+ # Stages are kept in the historically validated sequence — reordering
12
+ # them risks regressions because later stages often match patterns
13
+ # produced by earlier ones.
14
+ #
15
+ # Format detection (:mr vs :short) is also owned here because it is a
16
+ # property of the original input, not of the parsed tree.
17
+ class Preprocessor
18
+ # Outcome of preprocessing.
19
+ # cleaned - the normalized identifier string ready for the grammar
20
+ # format - :mr if the input uses dot-separators, :short otherwise
21
+ Result = Struct.new(:cleaned, :format, keyword_init: true)
22
+
23
+ # Convert Roman numerals to Arabic numbers per NIST spec.
24
+ ROMAN_TO_ARABIC = {
25
+ "I" => "1",
26
+ "II" => "2",
27
+ "III" => "3",
28
+ "IV" => "4",
29
+ "V" => "5",
30
+ "VI" => "6",
31
+ "VII" => "7",
32
+ "VIII" => "8",
33
+ "IX" => "9",
34
+ "X" => "10",
35
+ }.freeze
36
+
37
+ def initialize(input)
38
+ @input = input.to_s.strip
39
+ @cleaned = Core::UpdateCodes.apply(@input, :nist)
40
+ end
41
+
42
+ # Run every normalization stage and return a Result.
43
+ #
44
+ # Stage order is load-bearing — later stages match patterns produced
45
+ # by earlier ones. Reordering requires running the full NIST fixture
46
+ # suite to verify no regression.
47
+ def call
48
+ run_stages
49
+ Result.new(cleaned: @cleaned, format: detected_format)
50
+ end
51
+
52
+ # Sequence of normalization stages in historically validated order.
53
+ # Extracted so rubocop can scope length/ABC metrics narrowly.
54
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
55
+ def run_stages
56
+ normalize_publisher_and_series!
57
+ normalize_lcirc_supplement_contexts!
58
+ normalize_revision_spacing!
59
+ normalize_letter_suffix_casing!
60
+ normalize_draft_and_volume!
61
+ convert_roman_volumes!
62
+ normalize_supplement_and_part!
63
+ normalize_version_notation!
64
+ normalize_edition_year_suffix!
65
+ normalize_revision_with_letter!
66
+ normalize_version_dotted_spaces!
67
+ normalize_update_markers!
68
+ normalize_supplement_variants!
69
+ normalize_revision_language!
70
+ normalize_mr_translation_codes!
71
+ convert_dashyear_to_edition!
72
+ revert_dashyear_for_series!
73
+ normalize_version_verbose!
74
+ normalize_part_notation!
75
+ normalize_series_specific_spacing!
76
+ normalize_verbose_keywords!
77
+ end
78
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
79
+
80
+ # Detect input format: :mr (dot-separated machine-readable) or :short.
81
+ def detected_format
82
+ @input.include?(".") && !@input.match?(/\s/) ? :mr : :short
83
+ end
84
+
85
+ private
86
+
87
+ # Lowercase publishers, publisher+series concatenations, lowercase
88
+ # series codes, and the lone "LC" → "LCIRC" expansion.
89
+ def normalize_publisher_and_series!
90
+ @cleaned = @cleaned.sub(/^nbs\b/i, "NBS")
91
+ @cleaned = @cleaned.sub(/^nist\b/i, "NIST")
92
+ @cleaned = @cleaned.gsub(
93
+ /^(NBS|NIST)(IR|FIPS|GCR|HB|MONO|MP|NCSTAR|NSRDS)/i, '\1 \2'
94
+ )
95
+ @cleaned = @cleaned.sub(/\b(ir|sp|tn|hb|fips|ams|vts)\b/i, &:upcase)
96
+ @cleaned = @cleaned.gsub(/\bLC\b(?!IRC)/, "LCIRC")
97
+ end
98
+
99
+ # LCIRC series: combine with NBS prefix when a supplement marker
100
+ # follows, and convert MR-format dots to spaces so the grammar's
101
+ # circ_supplement_identifier rule can match.
102
+ def normalize_lcirc_supplement_contexts!
103
+ @cleaned = @cleaned.gsub(
104
+ /\bNBS LCIRC\b(?=.*\b(?:supp?|sup\+|r\d+\/)\d)/, "NBS.LCIRC"
105
+ )
106
+ @cleaned = @cleaned.gsub(
107
+ /\bNBS\.LCIRC\.(\d+r\d+\/\d{4})/, "NBS LCIRC \\1"
108
+ )
109
+ @cleaned = @cleaned.gsub(/\bNBS\.LCIRC\.(\d+r\d+)\b/, "NBS LCIRC \\1")
110
+ end
111
+
112
+ # Separate revision markers from adjacent digits. LCIRC and CIRC
113
+ # series keep their compact revision form because their grammar
114
+ # rules expect it.
115
+ def normalize_revision_spacing!
116
+ @cleaned = @cleaned.gsub(/([-\d]+[IVX]+[-\d]+)\s+(\d+)/, '\1.\2')
117
+ @cleaned = @cleaned.gsub(/(?<!e)(\d)(rev\d{4})/, '\1 \2')
118
+ # Re-parse round-trip: fold dotted edition-date back to canonical
119
+ # "rev" form so pubid can re-read its own output.
120
+ @cleaned = @cleaned.gsub(/(\d+e\d+)\.([A-Za-z]{3,9}\d{4})/, '\1rev\2')
121
+ # IR revision with slash+year is a V1 Update, not a revision.
122
+ # Must run BEFORE the LCIRC slash rule below so it never adds
123
+ # a space here.
124
+ normalize_ir_slash_year_to_update!
125
+ unless @cleaned.include?("LCIRC") || @cleaned.include?("CIRC")
126
+ @cleaned = @cleaned.gsub(/(\d)(r\d+\/\d{4})/, '\1 \2')
127
+ end
128
+ @cleaned = @cleaned.gsub(/\b(r(?!v)\d{4})\b/, ' \1')
129
+ @cleaned = @cleaned.gsub(/(\d)(r[A-Z][a-z]{2,8}\d{4})/, '\1 \2')
130
+ end
131
+
132
+ # "5058r04/98" → "5058/Upd1-199804" (mirrors archived v1 NistIr
133
+ # parser). A 2-digit year normalizes to 19YY. Only applies to IR
134
+ # series; CIRC/LCIRC keep their slash-year form.
135
+ def normalize_ir_slash_year_to_update!
136
+ return unless @cleaned =~ /\bIR\b/ && !@cleaned.include?("CIRC")
137
+
138
+ @cleaned = @cleaned.gsub(%r{(\d)r(\d{1,2})/(\d{2,4})}) do
139
+ num, mon, yr = ::Regexp.last_match(1), ::Regexp.last_match(2), ::Regexp.last_match(3)
140
+ yyyy = yr.length == 2 ? "19#{yr}" : yr
141
+ "#{num}/Upd1-#{yyyy}#{format('%02d', mon.to_i)}"
142
+ end
143
+ end
144
+
145
+ # Uppercase lone letter suffixes attached to numbers. NCSTAR keeps
146
+ # lowercase volume letters (e.g. "1-1av1") per its grammar.
147
+ def normalize_letter_suffix_casing!
148
+ uppercase_dash_letter!
149
+ uppercase_trailing_letter!
150
+ uppercase_revision_letter!
151
+ uppercase_letter_before_revision!
152
+ uppercase_letter_before_volume! unless @cleaned.include?("NCSTAR")
153
+ end
154
+
155
+ # Trailing "-a" → "-A" at end of identifier.
156
+ def uppercase_dash_letter!
157
+ @cleaned = @cleaned.gsub(/(\d)-([a-z])$/) { "#{$1}-#{$2.upcase}" }
158
+ end
159
+
160
+ # Trailing "a" → "A" when attached directly to a digit (excludes
161
+ # "r" to preserve revision+year patterns like "73-197r").
162
+ def uppercase_trailing_letter!
163
+ @cleaned = @cleaned.gsub(/(\d)([a-z&&[^r]])$/) { "#{$1}#{$2.upcase}" }
164
+ end
165
+
166
+ # Letter suffix on revision: "22r1a" → "22r1A".
167
+ def uppercase_revision_letter!
168
+ @cleaned = @cleaned.gsub(/(\d)(r)(\d+)([a-z])$/) do
169
+ "#{$1}#{$2}#{$3}#{$4.upcase}"
170
+ end
171
+ end
172
+
173
+ # Letter between number and revision: "53ar1" → "53Ar1".
174
+ def uppercase_letter_before_revision!
175
+ @cleaned = @cleaned.gsub(/(\d)([a-z])(r\d)/) do
176
+ "#{$1}#{$2.upcase}#{$3}"
177
+ end
178
+ end
179
+
180
+ # Letter between number and volume: "1-2bv1" → "1-2Bv1". Skipped
181
+ # for NCSTAR which preserves lowercase letters per its grammar.
182
+ def uppercase_letter_before_volume!
183
+ @cleaned = @cleaned.gsub(/(\d)([a-z&&[^r]])(v\d+)/) do
184
+ "#{$1}#{$2.upcase}#{$3}"
185
+ end
186
+ end
187
+
188
+ # Volume/draft spacing and supplement typo fixes that must run
189
+ # before the more general draft and supplement normalizations.
190
+ def normalize_draft_and_volume!
191
+ @cleaned = @cleaned.gsub(/(\d{2}-\d{4})\s+(\d)$/, '\1 v\2')
192
+ @cleaned = @cleaned.gsub(/(\d)-draft(\d)/, '\1 -draft \2')
193
+ @cleaned = @cleaned.gsub(/(\d)draft(\d)/, '\1 -draft \2')
194
+ @cleaned = @cleaned.gsub(/(\d)suprev/, '\1supprev')
195
+ @cleaned = @cleaned.gsub(
196
+ /(\d{2,})([A-Z])(r\d+)([-\s]draft\d*)/, '\1\2 \3\4'
197
+ )
198
+ end
199
+
200
+ # Roman numeral volumes → "v<arabic> ver<version>" per NIST spec.
201
+ def convert_roman_volumes!
202
+ @cleaned = @cleaned.gsub(/(\d+)-([IVX]+)-(\d+(?:\.\d+)*)/) do
203
+ "#{Regexp.last_match(1)} v#{roman_to_arabic(Regexp.last_match(2))} " \
204
+ "ver#{Regexp.last_match(3)}"
205
+ end
206
+ end
207
+
208
+ # LCIRC supplement with slash-year separator, and "Pt" part prefix
209
+ # with revision.
210
+ def normalize_supplement_and_part!
211
+ @cleaned = @cleaned.gsub(/(\d)(supp\d+\/\d{4})/, '\1 \2')
212
+ @cleaned = @cleaned.gsub(/(\d)Pt(\d+)(r\d+)/, '\1 pt\2 \3')
213
+ end
214
+
215
+ # Version notation: insert spaces between digits and "ver" / "v",
216
+ # split combined fields, normalize volume ranges.
217
+ def normalize_version_notation!
218
+ @cleaned = @cleaned.gsub(/(\d)ver(\d)/, '\1 ver \2')
219
+ @cleaned = @cleaned.gsub(/ver(\d+)e(\d{4})/, 'ver\1 e\2')
220
+ @cleaned = @cleaned.gsub(/ver(\d+)v(\d+)/, 'ver\1 v\2')
221
+ @cleaned = @cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
222
+ @cleaned = @cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
223
+ @cleaned = @cleaned.gsub(/(\d)(v\d+)\s+(\d+)$/, '\1 \2.\3')
224
+ @cleaned = @cleaned.gsub(/(\d)(v\d+)\s+(\d+)\s+(\d+)$/, '\1 \2.\3.\4')
225
+ @cleaned = @cleaned.gsub(/(\d)(v\d+[a-z]-[a-z])/, '\1 \2')
226
+ @cleaned = @cleaned.gsub(/(\d)(v\d+[A-Z])/, '\1 \2')
227
+ @cleaned = @cleaned.gsub(/(v\d+)([A-Z])-([A-Z])/, '\1\2-\3'.downcase)
228
+ end
229
+
230
+ # Edition year suffix shorthand: "2006ed." → "e2006".
231
+ def normalize_edition_year_suffix!
232
+ @cleaned = @cleaned.gsub(/(\d{4})ed\./, 'e\1')
233
+ end
234
+
235
+ # Revision attached to a number with optional letter suffix. When
236
+ # a letter suffix is present, keep them together for the
237
+ # second_number grammar rule; otherwise insert a space before
238
+ # following uppercase letters or update keywords.
239
+ def normalize_revision_with_letter!
240
+ @cleaned = @cleaned.gsub(/(\d+)(r\d{1,2})([a-z])(?=-|[A-Z]|$)/) do
241
+ "#{Regexp.last_match(1)}#{Regexp.last_match(2)}" \
242
+ "#{Regexp.last_match(3).upcase}"
243
+ end
244
+ # rubocop:disable Layout/LineLength
245
+ @cleaned = @cleaned.gsub(/(\d+)(r\d{1,2})(?![a-zA-Z])(?=[A-Z]|-(?=[A-Z])|\/(?:upd|errata|insert))/) do
246
+ "#{Regexp.last_match(1)} #{Regexp.last_match(2)}"
247
+ end
248
+ # rubocop:enable Layout/LineLength
249
+ end
250
+
251
+ # Dotted versions with internal spaces ("v1 1" → "v1.1"). Negative
252
+ # lookahead prevents swallowing draft stage digits ("189 2pd").
253
+ def normalize_version_dotted_spaces!
254
+ # rubocop:disable Layout/LineLength
255
+ @cleaned = @cleaned.gsub(/(\b(?:v|\d)[v\d]*[-A-Z]*)\s+(\d+)(?!(?i:pd|wd|prd)\b)\s+(\d+)(?!(?i:pd|wd|prd)\b)/, '\1.\2.\3')
256
+ @cleaned = @cleaned.gsub(/(\b(?:v|\d)[v\d]*)\s+(\d+)(?!(?i:pd|wd|prd)\b)/, '\1.\2')
257
+ # rubocop:enable Layout/LineLength
258
+ end
259
+
260
+ # Update markers ("-upd", "/upd") need a space before them so the
261
+ # grammar's update rule can match.
262
+ def normalize_update_markers!
263
+ @cleaned = @cleaned.gsub(/(\d+)-upd(\d*)/, '\1 -upd\2')
264
+ @cleaned = @cleaned.gsub(/(\d+)\/upd(\d*)/, '\1 /upd\2')
265
+ @cleaned = @cleaned.gsub(/([a-z]\d+)-upd/, '\1 -upd')
266
+ @cleaned = @cleaned.gsub(/([a-z]\d+)\/upd/, '\1 /upd')
267
+ @cleaned = @cleaned.gsub(/(\d+[A-Z])-upd(\d*)/, '\1 -upd\2')
268
+ @cleaned = @cleaned.gsub(/(\d+[A-Z])\/upd(\d*)/, '\1 /upd\2')
269
+ end
270
+
271
+ # Supplement prefix variants ("sup", "sup+", "supp") all need a
272
+ # space before them; the "sup" form is normalized to "supp" when
273
+ # attached to a letter suffix or slash-year.
274
+ def normalize_supplement_variants!
275
+ @cleaned = @cleaned.gsub(/(\d)(sup\d)/, '\1 \2')
276
+ @cleaned = @cleaned.gsub(/(\d)(sup+)(\d)/, '\1 \2\3')
277
+ @cleaned = @cleaned.gsub(/(\d)(sup\+)(\d)/, '\1 \2\3')
278
+ @cleaned = @cleaned.gsub(/(\d)(sup\d+)/, '\1 \2')
279
+ @cleaned = @cleaned.gsub(/(\d)(sup\d+\b)/, '\1 \2')
280
+ @cleaned = @cleaned.gsub(/(\d+[A-Z])sup(\b)/, '\1supp\2')
281
+ @cleaned = @cleaned.gsub(/(\d+)sup(\d+\/\d{4})/, '\1supp\2')
282
+ @cleaned = @cleaned.gsub(/(\d)(supp?)-(\d{4})(?![\d\/])/, '\1supp\3')
283
+ end
284
+
285
+ # Standalone "r" between number-letter and revision, bare trailing
286
+ # "r" → "r1" (V1 empty-revision normalization), and revision
287
+ # directly followed by a language code.
288
+ def normalize_revision_language!
289
+ @cleaned = @cleaned.gsub(/(\d[a-z])r\b/, '\1 r')
290
+ @cleaned = @cleaned.gsub(/(\d)r\z/, '\1r1')
291
+ @cleaned = @cleaned.gsub(
292
+ /(r\d+)(es|pt|chi|viet|port|esp)\b/, '\1 \2'
293
+ )
294
+ end
295
+
296
+ # MR-format translation codes (".spa", ".por", ".ind") would be
297
+ # misparsed as letter suffixes — convert the trailing dot to a space.
298
+ def normalize_mr_translation_codes!
299
+ @cleaned = @cleaned.gsub(
300
+ /^([A-Z]+)\.SP\.(\d+)\.([a-z]{2,4})$/, '\1.SP.\2 \3'
301
+ )
302
+ @cleaned = @cleaned.gsub(
303
+ /^([A-Z]+)\.([A-Z]+)\.(\d+)\.([a-z]{2,4})$/, '\1.\2.\3 \4'
304
+ )
305
+ end
306
+
307
+ # Trailing "-YYYY" → "eYYYY" edition marker, but only when the
308
+ # four-digit group is plausibly a year (1901–2099). Part numbers
309
+ # outside that range (e.g. SP 250-1039) are left untouched.
310
+ #
311
+ # The letter suffix may be lower- or uppercase (e.g. SP 800-38b-2005);
312
+ # it is upcased so the year edition splits off cleanly and the letter
313
+ # becomes a Part component ("800-38Be2005"), matching how a letter
314
+ # suffix without a year (800-38a → 800-38A) is already normalized.
315
+ # "e"/"E" are excluded from the letter so they cannot be confused with
316
+ # the edition marker itself.
317
+ def convert_dashyear_to_edition!
318
+ @cleaned = @cleaned.gsub(
319
+ /(?<!e\d)(?<![eE-])(\d(?:[A-DF-Za-df-z]?))-(\d{4})(?=\s|$)/,
320
+ ) do |match|
321
+ prefix = Regexp.last_match(1)
322
+ year = Regexp.last_match(2).to_i
323
+ year.between?(1901, 2099) ? "#{prefix.upcase}e#{year}" : match
324
+ end
325
+ end
326
+
327
+ # Series-specific reverts: HB handbooks, OWMWP dates, and RPT year
328
+ # ranges use dash-year structurally (not as an edition marker), so
329
+ # the broad convert_dashyear_to_edition! rule would corrupt them.
330
+ def revert_dashyear_for_series!
331
+ revert_handbook_edition!
332
+ revert_owmwp_date!
333
+ revert_report_year_range!
334
+ end
335
+
336
+ # HB handbooks: "HB 130e1979" → "HB 130-1979" (year is part of
337
+ # the handbook designation, not an edition marker).
338
+ def revert_handbook_edition!
339
+ @cleaned = @cleaned.gsub(
340
+ /\b(HB|HB\s+)[^:\s.]*?(\d+)e(\d{4})(?=\s|$)/, '\1\2-\3'
341
+ )
342
+ end
343
+
344
+ # OWMWP series: dates use MM-DD-YYYY format, so "OWMWP 06-13e2018"
345
+ # reverts to "OWMWP 06-13-2018".
346
+ def revert_owmwp_date!
347
+ @cleaned = @cleaned.gsub(
348
+ /\b(OWMWP|OWMWP\s*)[^:\s]*?(\d{2})-(\d{2})e(\d{4})(?=\s|$)/,
349
+ '\1\2-\3-\4',
350
+ )
351
+ end
352
+
353
+ # RPT series: year ranges "1946-1947" should not be reinterpreted as
354
+ # editions. Only revert when the first year precedes the second.
355
+ def revert_report_year_range!
356
+ @cleaned = @cleaned.gsub(
357
+ /\b(RPT|RPT\s*)([^:\s]*?)(\d{4})e(\d{4})(?=\s|$)/,
358
+ ) { |m| build_report_year_range(m, Regexp.last_match.captures) }
359
+ end
360
+
361
+ # Build the reverted year-range form from the gsub captures, or
362
+ # return the original match when the years are not a forward range.
363
+ def build_report_year_range(match, captures)
364
+ prefix, separator, first, second = captures
365
+ return match unless first.to_i < second.to_i
366
+
367
+ "#{prefix}#{separator}#{first}-#{second}"
368
+ end
369
+
370
+ # Verbose version markers ("v1.1" → "ver1.1", "Ver. 2.0" →
371
+ # "ver2.0"), MR-format "-v" → ".ver".
372
+ def normalize_version_verbose!
373
+ @cleaned = @cleaned.gsub(/-v(\d+\.\d+)/, '.ver\1')
374
+ @cleaned = @cleaned.gsub(/\bVer\.\s+(\d+(?:\.\d+)*)/, 'ver\1')
375
+ @cleaned = @cleaned.gsub(/\bv(\d+\.\d+(?:\.\d+)*)/, 'ver\1')
376
+ end
377
+
378
+ # Part notation: uppercase "P" → "p"; lone "p"/"n" → "pt" (unless
379
+ # followed by a 4-digit year, which is part+year not part-prefix).
380
+ def normalize_part_notation!
381
+ @cleaned = @cleaned.gsub(/(\d)P(\d)/, '\1 p\2')
382
+ @cleaned = @cleaned.gsub(/\b([pn])(\d+)(?!\d{4}\b)/, 'pt\2')
383
+ @cleaned = @cleaned.gsub(/(\d)([pP]\d+)/, '\1 \2')
384
+ end
385
+
386
+ # Series-specific spacing rules: CRPL-F needs a space after the
387
+ # letter band; compound report numbers ("17-917v3") need the
388
+ # volume broken out.
389
+ def normalize_series_specific_spacing!
390
+ @cleaned = @cleaned.gsub(/(NBS CRPL-F-[AB])(\d)/, '\1 \2')
391
+ @cleaned = @cleaned.gsub(/(CRPL-F-[AB])(\d)/, '\1 \2')
392
+ @cleaned = @cleaned.gsub(/(\d+-\d+)(v\d+)(?![.\d])/, '\1 \2')
393
+ end
394
+
395
+ # Verbose keyword spellings ("Version", "Revision", "Part", "Add",
396
+ # "Suppl", "report") normalized to their short canonical forms.
397
+ def normalize_verbose_keywords!
398
+ @cleaned = @cleaned.gsub(/(\d+)\s+Suppl\b/, '\1Suppl')
399
+ @cleaned = @cleaned.gsub(/\s+Version\s+(\d+)/, ' ver \1')
400
+ @cleaned = @cleaned.gsub(/\s+Revision\s+\(r\)/, " r")
401
+ @cleaned = @cleaned.gsub(/\s+Part\s+(\d+)/, 'pt\1')
402
+ @cleaned = @cleaned.gsub(/(\d[a-z]?)\s+Add\b\.?/i) do
403
+ "#{Regexp.last_match(1).upcase} Add."
404
+ end
405
+ @cleaned = @cleaned.gsub(/(\d+)\s+rev\s+(\d{4})/, '\1r\2')
406
+ @cleaned = @cleaned.gsub(/\breport\s*;\s*/, "RPT ")
407
+ @cleaned = @cleaned.gsub(/\breport\b/, "RPT")
408
+ end
409
+
410
+ # Translate a Roman numeral into its Arabic equivalent.
411
+ def roman_to_arabic(roman)
412
+ ROMAN_TO_ARABIC.fetch(roman, roman)
413
+ end
414
+ end
415
+ end
416
+ end