pubid 1.15.19 → 2.0.0.pre.alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (604) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/README.adoc +2041 -53
  4. data/archived-gems/pubid-ccsds/update_codes.yaml +1 -0
  5. data/archived-gems/pubid-iec/stages.yaml +129 -0
  6. data/archived-gems/pubid-iec/update_codes.yaml +67 -0
  7. data/archived-gems/pubid-ieee/update_codes.yaml +104 -0
  8. data/archived-gems/pubid-iso/stages.yaml +106 -0
  9. data/archived-gems/pubid-iso/update_codes.yaml +4 -0
  10. data/archived-gems/pubid-itu/i18n.yaml +13 -0
  11. data/archived-gems/pubid-itu/series.yaml +42 -0
  12. data/archived-gems/pubid-nist/publishers.yaml +6 -0
  13. data/archived-gems/pubid-nist/series.yaml +121 -0
  14. data/archived-gems/pubid-nist/stages.yaml +16 -0
  15. data/archived-gems/pubid-nist/update_codes.yaml +93 -0
  16. data/archived-gems/pubid-plateau/update_codes.yaml +6 -0
  17. data/data/ccsds/update_codes.yaml +1 -0
  18. data/data/iec/update_codes.yaml +67 -0
  19. data/data/ieee/update_codes.yaml +104 -0
  20. data/data/iso/update_codes.yaml +21 -0
  21. data/data/nist/update_codes.yaml +89 -0
  22. data/data/plateau/update_codes.yaml +6 -0
  23. data/lib/pubid/amca/builder.rb +176 -0
  24. data/lib/pubid/amca/identifier.rb +57 -0
  25. data/lib/pubid/amca/identifiers/base.rb +64 -0
  26. data/lib/pubid/amca/identifiers/interpretation.rb +51 -0
  27. data/lib/pubid/amca/identifiers/publication.rb +47 -0
  28. data/lib/pubid/amca/identifiers/standard.rb +22 -0
  29. data/lib/pubid/amca/identifiers.rb +12 -0
  30. data/lib/pubid/amca/parser.rb +153 -0
  31. data/lib/pubid/amca/scheme.rb +16 -0
  32. data/lib/pubid/amca/single_identifier.rb +33 -0
  33. data/lib/pubid/amca/urn_generator.rb +50 -0
  34. data/lib/pubid/amca.rb +26 -0
  35. data/lib/pubid/ansi/builder.rb +52 -0
  36. data/lib/pubid/ansi/identifier.rb +55 -0
  37. data/lib/pubid/ansi/identifiers/american_national_standard.rb +12 -0
  38. data/lib/pubid/ansi/identifiers/standard.rb +16 -0
  39. data/lib/pubid/ansi/identifiers.rb +11 -0
  40. data/lib/pubid/ansi/parser.rb +91 -0
  41. data/lib/pubid/ansi/scheme.rb +15 -0
  42. data/lib/pubid/ansi/single_identifier.rb +45 -0
  43. data/lib/pubid/ansi/urn_generator.rb +76 -0
  44. data/lib/pubid/ansi.rb +27 -0
  45. data/lib/pubid/api/builder.rb +85 -0
  46. data/lib/pubid/api/components/code.rb +9 -0
  47. data/lib/pubid/api/identifier.rb +68 -0
  48. data/lib/pubid/api/identifiers/base.rb +24 -0
  49. data/lib/pubid/api/identifiers/bulletin.rb +15 -0
  50. data/lib/pubid/api/identifiers/continuous_operations_standard.rb +15 -0
  51. data/lib/pubid/api/identifiers/mpms.rb +44 -0
  52. data/lib/pubid/api/identifiers/publication.rb +15 -0
  53. data/lib/pubid/api/identifiers/recommended_practice.rb +15 -0
  54. data/lib/pubid/api/identifiers/specification.rb +15 -0
  55. data/lib/pubid/api/identifiers/standard.rb +15 -0
  56. data/lib/pubid/api/identifiers/technical_report.rb +15 -0
  57. data/lib/pubid/api/identifiers/typeless_standard.rb +27 -0
  58. data/lib/pubid/api/parser.rb +140 -0
  59. data/lib/pubid/api/scheme.rb +66 -0
  60. data/lib/pubid/api/single_identifier.rb +46 -0
  61. data/lib/pubid/api/urn_generator.rb +41 -0
  62. data/lib/pubid/api.rb +17 -0
  63. data/lib/pubid/ashrae/builder.rb +498 -0
  64. data/lib/pubid/ashrae/identifier.rb +57 -0
  65. data/lib/pubid/ashrae/identifiers/addenda_package.rb +46 -0
  66. data/lib/pubid/ashrae/identifiers/addendum.rb +55 -0
  67. data/lib/pubid/ashrae/identifiers/base.rb +23 -0
  68. data/lib/pubid/ashrae/identifiers/combined_addenda.rb +51 -0
  69. data/lib/pubid/ashrae/identifiers/errata.rb +40 -0
  70. data/lib/pubid/ashrae/identifiers/guideline.rb +38 -0
  71. data/lib/pubid/ashrae/identifiers/interpretation.rb +39 -0
  72. data/lib/pubid/ashrae/identifiers/standard.rb +38 -0
  73. data/lib/pubid/ashrae/identifiers.rb +16 -0
  74. data/lib/pubid/ashrae/parser.rb +724 -0
  75. data/lib/pubid/ashrae/scheme.rb +53 -0
  76. data/lib/pubid/ashrae/single_identifier.rb +23 -0
  77. data/lib/pubid/ashrae/supplement_identifier.rb +23 -0
  78. data/lib/pubid/ashrae/urn_generator.rb +59 -0
  79. data/lib/pubid/ashrae.rb +21 -0
  80. data/lib/pubid/asme/builder.rb +153 -0
  81. data/lib/pubid/asme/components/code.rb +18 -0
  82. data/lib/pubid/asme/identifier.rb +61 -0
  83. data/lib/pubid/asme/identifiers/base.rb +70 -0
  84. data/lib/pubid/asme/identifiers/standard.rb +12 -0
  85. data/lib/pubid/asme/identifiers.rb +10 -0
  86. data/lib/pubid/asme/parser.rb +308 -0
  87. data/lib/pubid/asme/scheme.rb +37 -0
  88. data/lib/pubid/asme/single_identifier.rb +29 -0
  89. data/lib/pubid/asme/urn_generator.rb +133 -0
  90. data/lib/pubid/asme.rb +21 -0
  91. data/lib/pubid/astm/builder.rb +159 -0
  92. data/lib/pubid/astm/components/code.rb +33 -0
  93. data/lib/pubid/astm/identifier.rb +92 -0
  94. data/lib/pubid/astm/identifiers/adjunct.rb +21 -0
  95. data/lib/pubid/astm/identifiers/base.rb +13 -0
  96. data/lib/pubid/astm/identifiers/data_series.rb +25 -0
  97. data/lib/pubid/astm/identifiers/iso_dual_published.rb +74 -0
  98. data/lib/pubid/astm/identifiers/manual.rb +40 -0
  99. data/lib/pubid/astm/identifiers/monograph.rb +25 -0
  100. data/lib/pubid/astm/identifiers/research_report.rb +18 -0
  101. data/lib/pubid/astm/identifiers/standard.rb +52 -0
  102. data/lib/pubid/astm/identifiers/technical_report.rb +23 -0
  103. data/lib/pubid/astm/identifiers/work_in_progress.rb +21 -0
  104. data/lib/pubid/astm/parser.rb +244 -0
  105. data/lib/pubid/astm/scheme.rb +55 -0
  106. data/lib/pubid/astm/single_identifier.rb +25 -0
  107. data/lib/pubid/astm/urn_generator.rb +99 -0
  108. data/lib/pubid/astm.rb +38 -0
  109. data/lib/pubid/bsi/builder.rb +1483 -0
  110. data/lib/pubid/bsi/components/code.rb +11 -0
  111. data/lib/pubid/bsi/components/date.rb +11 -0
  112. data/lib/pubid/bsi/components/publisher.rb +11 -0
  113. data/lib/pubid/bsi/components/type.rb +11 -0
  114. data/lib/pubid/bsi/identifier.rb +87 -0
  115. data/lib/pubid/bsi/identifiers/addendum_document.rb +64 -0
  116. data/lib/pubid/bsi/identifiers/adopted_european_norm.rb +95 -0
  117. data/lib/pubid/bsi/identifiers/adopted_international_standard.rb +82 -0
  118. data/lib/pubid/bsi/identifiers/aerospace_standard.rb +118 -0
  119. data/lib/pubid/bsi/identifiers/amendment.rb +40 -0
  120. data/lib/pubid/bsi/identifiers/base.rb +11 -0
  121. data/lib/pubid/bsi/identifiers/british_industrial_practice.rb +27 -0
  122. data/lib/pubid/bsi/identifiers/british_standard.rb +33 -0
  123. data/lib/pubid/bsi/identifiers/bundled_identifier.rb +114 -0
  124. data/lib/pubid/bsi/identifiers/committee_document.rb +51 -0
  125. data/lib/pubid/bsi/identifiers/consolidated_identifier.rb +152 -0
  126. data/lib/pubid/bsi/identifiers/corrigendum.rb +28 -0
  127. data/lib/pubid/bsi/identifiers/detailed_specification.rb +69 -0
  128. data/lib/pubid/bsi/identifiers/disc.rb +56 -0
  129. data/lib/pubid/bsi/identifiers/draft_document.rb +71 -0
  130. data/lib/pubid/bsi/identifiers/electronic_book.rb +52 -0
  131. data/lib/pubid/bsi/identifiers/expert_commentary.rb +47 -0
  132. data/lib/pubid/bsi/identifiers/explanatory_supplement.rb +82 -0
  133. data/lib/pubid/bsi/identifiers/flex.rb +61 -0
  134. data/lib/pubid/bsi/identifiers/handbook.rb +39 -0
  135. data/lib/pubid/bsi/identifiers/index.rb +62 -0
  136. data/lib/pubid/bsi/identifiers/method.rb +76 -0
  137. data/lib/pubid/bsi/identifiers/national_annex.rb +73 -0
  138. data/lib/pubid/bsi/identifiers/practice_guide.rb +27 -0
  139. data/lib/pubid/bsi/identifiers/publicly_available_specification.rb +79 -0
  140. data/lib/pubid/bsi/identifiers/published_document.rb +79 -0
  141. data/lib/pubid/bsi/identifiers/section.rb +62 -0
  142. data/lib/pubid/bsi/identifiers/set.rb +46 -0
  143. data/lib/pubid/bsi/identifiers/standalone_amendment.rb +40 -0
  144. data/lib/pubid/bsi/identifiers/supplement_document.rb +51 -0
  145. data/lib/pubid/bsi/identifiers/supplementary_index.rb +81 -0
  146. data/lib/pubid/bsi/identifiers/technical_specification.rb +79 -0
  147. data/lib/pubid/bsi/identifiers/test_method.rb +67 -0
  148. data/lib/pubid/bsi/identifiers/value_added_publication.rb +52 -0
  149. data/lib/pubid/bsi/identifiers.rb +52 -0
  150. data/lib/pubid/bsi/model.rb +196 -0
  151. data/lib/pubid/bsi/parser.rb +659 -0
  152. data/lib/pubid/bsi/scheme.rb +243 -0
  153. data/lib/pubid/bsi/single_identifier.rb +129 -0
  154. data/lib/pubid/bsi/urn_generator.rb +84 -0
  155. data/lib/pubid/bsi.rb +32 -0
  156. data/lib/pubid/builder/base.rb +138 -0
  157. data/lib/pubid/bundled_identifier.rb +126 -0
  158. data/lib/pubid/ccsds/builder.rb +56 -0
  159. data/lib/pubid/ccsds/identifier.rb +84 -0
  160. data/lib/pubid/ccsds/identifiers/base.rb +89 -0
  161. data/lib/pubid/ccsds/identifiers/base_BASE_88929.rb +70 -0
  162. data/lib/pubid/ccsds/identifiers/corrigendum.rb +39 -0
  163. data/lib/pubid/ccsds/identifiers.rb +10 -0
  164. data/lib/pubid/ccsds/parser.rb +71 -0
  165. data/lib/pubid/ccsds/scheme.rb +57 -0
  166. data/lib/pubid/ccsds/single_identifier.rb +77 -0
  167. data/lib/pubid/ccsds/supplement_identifier.rb +33 -0
  168. data/lib/pubid/ccsds/urn_generator.rb +115 -0
  169. data/lib/pubid/ccsds.rb +21 -0
  170. data/lib/pubid/cen_cenelec/builder.rb +330 -0
  171. data/lib/pubid/cen_cenelec/identifier.rb +52 -0
  172. data/lib/pubid/cen_cenelec/identifiers/adopted_european_norm.rb +40 -0
  173. data/lib/pubid/cen_cenelec/identifiers/amendment.rb +29 -0
  174. data/lib/pubid/cen_cenelec/identifiers/base.rb +75 -0
  175. data/lib/pubid/cen_cenelec/identifiers/cen_report.rb +28 -0
  176. data/lib/pubid/cen_cenelec/identifiers/cen_workshop_agreement.rb +27 -0
  177. data/lib/pubid/cen_cenelec/identifiers/cenelec_harmonization_document.rb +28 -0
  178. data/lib/pubid/cen_cenelec/identifiers/consolidated_identifier.rb +61 -0
  179. data/lib/pubid/cen_cenelec/identifiers/corrigendum.rb +35 -0
  180. data/lib/pubid/cen_cenelec/identifiers/european_norm.rb +41 -0
  181. data/lib/pubid/cen_cenelec/identifiers/european_prestandard.rb +37 -0
  182. data/lib/pubid/cen_cenelec/identifiers/european_specification.rb +28 -0
  183. data/lib/pubid/cen_cenelec/identifiers/fragment.rb +22 -0
  184. data/lib/pubid/cen_cenelec/identifiers/guide.rb +27 -0
  185. data/lib/pubid/cen_cenelec/identifiers/harmonization_document.rb +27 -0
  186. data/lib/pubid/cen_cenelec/identifiers/technical_report.rb +27 -0
  187. data/lib/pubid/cen_cenelec/identifiers/technical_specification.rb +35 -0
  188. data/lib/pubid/cen_cenelec/identifiers.rb +32 -0
  189. data/lib/pubid/cen_cenelec/parser.rb +144 -0
  190. data/lib/pubid/cen_cenelec/scheme.rb +164 -0
  191. data/lib/pubid/cen_cenelec/single_identifier.rb +130 -0
  192. data/lib/pubid/cen_cenelec/supplement_identifier.rb +48 -0
  193. data/lib/pubid/cen_cenelec/urn_generator.rb +129 -0
  194. data/lib/pubid/cen_cenelec.rb +21 -0
  195. data/lib/pubid/cie/builder.rb +399 -0
  196. data/lib/pubid/cie/components/code.rb +72 -0
  197. data/lib/pubid/cie/components/language.rb +58 -0
  198. data/lib/pubid/cie/identifier.rb +71 -0
  199. data/lib/pubid/cie/identifiers/bundle.rb +20 -0
  200. data/lib/pubid/cie/identifiers/conference.rb +32 -0
  201. data/lib/pubid/cie/identifiers/corrigendum.rb +40 -0
  202. data/lib/pubid/cie/identifiers/dual_published.rb +41 -0
  203. data/lib/pubid/cie/identifiers/identical.rb +64 -0
  204. data/lib/pubid/cie/identifiers/joint_published.rb +52 -0
  205. data/lib/pubid/cie/identifiers/standard.rb +58 -0
  206. data/lib/pubid/cie/identifiers/supplement.rb +45 -0
  207. data/lib/pubid/cie/identifiers/tutorial_bundle.rb +20 -0
  208. data/lib/pubid/cie/identifiers.rb +17 -0
  209. data/lib/pubid/cie/parser.rb +347 -0
  210. data/lib/pubid/cie/scheme.rb +64 -0
  211. data/lib/pubid/cie/single_identifier.rb +30 -0
  212. data/lib/pubid/cie/supplement_identifier.rb +26 -0
  213. data/lib/pubid/cie/urn_generator.rb +123 -0
  214. data/lib/pubid/cie.rb +28 -0
  215. data/lib/pubid/components/code.rb +33 -0
  216. data/lib/pubid/components/date.rb +49 -0
  217. data/lib/pubid/components/edition.rb +32 -0
  218. data/lib/pubid/components/factory.rb +50 -0
  219. data/lib/pubid/components/language.rb +37 -0
  220. data/lib/pubid/components/locality.rb +10 -0
  221. data/lib/pubid/components/publisher.rb +36 -0
  222. data/lib/pubid/components/stage.rb +54 -0
  223. data/lib/pubid/components/type.rb +58 -0
  224. data/lib/pubid/components/typed_stage.rb +59 -0
  225. data/lib/pubid/components.rb +16 -0
  226. data/lib/pubid/core/pattern_doc_generator.rb +272 -0
  227. data/lib/pubid/core/update_codes.rb +77 -0
  228. data/lib/pubid/core.rb +8 -0
  229. data/lib/pubid/csa/builder.rb +671 -0
  230. data/lib/pubid/csa/components/code.rb +9 -0
  231. data/lib/pubid/csa/components.rb +9 -0
  232. data/lib/pubid/csa/composite_identifier.rb +27 -0
  233. data/lib/pubid/csa/identifier.rb +513 -0
  234. data/lib/pubid/csa/identifiers/base.rb +133 -0
  235. data/lib/pubid/csa/identifiers/bundled.rb +125 -0
  236. data/lib/pubid/csa/identifiers/canadian_adopted.rb +82 -0
  237. data/lib/pubid/csa/identifiers/cec.rb +129 -0
  238. data/lib/pubid/csa/identifiers/combined.rb +130 -0
  239. data/lib/pubid/csa/identifiers/csa_adopted.rb +78 -0
  240. data/lib/pubid/csa/identifiers/package.rb +65 -0
  241. data/lib/pubid/csa/identifiers/series.rb +127 -0
  242. data/lib/pubid/csa/identifiers/standard.rb +10 -0
  243. data/lib/pubid/csa/identifiers.rb +17 -0
  244. data/lib/pubid/csa/parser.rb +445 -0
  245. data/lib/pubid/csa/scheme.rb +44 -0
  246. data/lib/pubid/csa/single_identifier.rb +30 -0
  247. data/lib/pubid/csa/urn_generator.rb +80 -0
  248. data/lib/pubid/csa/wrapper_identifier.rb +31 -0
  249. data/lib/pubid/csa.rb +25 -0
  250. data/lib/pubid/etsi/builder.rb +133 -0
  251. data/lib/pubid/etsi/components/code.rb +42 -0
  252. data/lib/pubid/etsi/components/version.rb +37 -0
  253. data/lib/pubid/etsi/components.rb +10 -0
  254. data/lib/pubid/etsi/identifier.rb +57 -0
  255. data/lib/pubid/etsi/identifiers/amendment.rb +15 -0
  256. data/lib/pubid/etsi/identifiers/base.rb +38 -0
  257. data/lib/pubid/etsi/identifiers/corrigendum.rb +15 -0
  258. data/lib/pubid/etsi/identifiers/etsi_standard.rb +19 -0
  259. data/lib/pubid/etsi/identifiers/supplement_identifier.rb +91 -0
  260. data/lib/pubid/etsi/identifiers.rb +14 -0
  261. data/lib/pubid/etsi/parser.rb +133 -0
  262. data/lib/pubid/etsi/scheme.rb +42 -0
  263. data/lib/pubid/etsi/urn_generator.rb +76 -0
  264. data/lib/pubid/etsi.rb +21 -0
  265. data/lib/pubid/export/auditor.rb +89 -0
  266. data/lib/pubid/export/data_class_exporter.rb +59 -0
  267. data/lib/pubid/export/exporter.rb +74 -0
  268. data/lib/pubid/export/flavor_exporter.rb +402 -0
  269. data/lib/pubid/export/ieee_exporter.rb +78 -0
  270. data/lib/pubid/export/itu_exporter.rb +66 -0
  271. data/lib/pubid/export/nist_exporter.rb +64 -0
  272. data/lib/pubid/export/registry_exporter.rb +90 -0
  273. data/lib/pubid/export/result.rb +97 -0
  274. data/lib/pubid/export/scheme_exporter.rb +70 -0
  275. data/lib/pubid/export.rb +18 -0
  276. data/lib/pubid/format_detector.rb +16 -0
  277. data/lib/pubid/format_registry.rb +42 -0
  278. data/lib/pubid/identifier.rb +242 -0
  279. data/lib/pubid/identifier_metadata.rb +148 -0
  280. data/lib/pubid/identifier_registry.rb +198 -0
  281. data/lib/pubid/idf/builder.rb +82 -0
  282. data/lib/pubid/idf/identifier.rb +129 -0
  283. data/lib/pubid/idf/identifiers/amendment.rb +27 -0
  284. data/lib/pubid/idf/identifiers/corrigendum.rb +27 -0
  285. data/lib/pubid/idf/identifiers/international_standard.rb +123 -0
  286. data/lib/pubid/idf/identifiers/reviewed_method.rb +100 -0
  287. data/lib/pubid/idf/identifiers.rb +13 -0
  288. data/lib/pubid/idf/parser.rb +143 -0
  289. data/lib/pubid/idf/scheme.rb +61 -0
  290. data/lib/pubid/idf/single_identifier.rb +19 -0
  291. data/lib/pubid/idf/supplement_identifier.rb +43 -0
  292. data/lib/pubid/idf/urn_generator.rb +84 -0
  293. data/lib/pubid/idf.rb +25 -0
  294. data/lib/pubid/iec/builder.rb +458 -0
  295. data/lib/pubid/iec/components/code.rb +60 -0
  296. data/lib/pubid/iec/components/consolidated_amendment.rb +59 -0
  297. data/lib/pubid/iec/components/publisher.rb +36 -0
  298. data/lib/pubid/iec/components/sheet.rb +32 -0
  299. data/lib/pubid/iec/components/trf_info.rb +38 -0
  300. data/lib/pubid/iec/components/vap_suffix.rb +41 -0
  301. data/lib/pubid/iec/identifier.rb +256 -0
  302. data/lib/pubid/iec/identifiers/amendment.rb +94 -0
  303. data/lib/pubid/iec/identifiers/base.rb +82 -0
  304. data/lib/pubid/iec/identifiers/component_specification.rb +39 -0
  305. data/lib/pubid/iec/identifiers/conformity_assessment.rb +39 -0
  306. data/lib/pubid/iec/identifiers/consolidated_identifier.rb +82 -0
  307. data/lib/pubid/iec/identifiers/corrigendum.rb +94 -0
  308. data/lib/pubid/iec/identifiers/fragment_identifier.rb +137 -0
  309. data/lib/pubid/iec/identifiers/guide.rb +104 -0
  310. data/lib/pubid/iec/identifiers/international_standard.rb +147 -0
  311. data/lib/pubid/iec/identifiers/interpretation_sheet.rb +104 -0
  312. data/lib/pubid/iec/identifiers/operational_document.rb +39 -0
  313. data/lib/pubid/iec/identifiers/publicly_available_specification.rb +101 -0
  314. data/lib/pubid/iec/identifiers/sheet_identifier.rb +62 -0
  315. data/lib/pubid/iec/identifiers/societal_technology_trend_report.rb +40 -0
  316. data/lib/pubid/iec/identifiers/systems_reference_document.rb +40 -0
  317. data/lib/pubid/iec/identifiers/technical_report.rb +132 -0
  318. data/lib/pubid/iec/identifiers/technical_specification.rb +132 -0
  319. data/lib/pubid/iec/identifiers/technology_report.rb +39 -0
  320. data/lib/pubid/iec/identifiers/test_report_form.rb +78 -0
  321. data/lib/pubid/iec/identifiers/vap_identifier.rb +73 -0
  322. data/lib/pubid/iec/identifiers/white_paper.rb +39 -0
  323. data/lib/pubid/iec/identifiers/working_document.rb +96 -0
  324. data/lib/pubid/iec/parser.rb +417 -0
  325. data/lib/pubid/iec/rendering_style.rb +113 -0
  326. data/lib/pubid/iec/scheme.rb +71 -0
  327. data/lib/pubid/iec/single_identifier.rb +80 -0
  328. data/lib/pubid/iec/supplement_identifier.rb +161 -0
  329. data/lib/pubid/iec/urn_generator.rb +79 -0
  330. data/lib/pubid/iec/urn_parser.rb +90 -0
  331. data/lib/pubid/iec.rb +85 -0
  332. data/lib/pubid/ieee/aiee/builder.rb +71 -0
  333. data/lib/pubid/ieee/aiee/identifier.rb +105 -0
  334. data/lib/pubid/ieee/aiee/parser.rb +130 -0
  335. data/lib/pubid/ieee/aiee.rb +11 -0
  336. data/lib/pubid/ieee/builder.rb +1237 -0
  337. data/lib/pubid/ieee/components/code.rb +102 -0
  338. data/lib/pubid/ieee/components/draft.rb +93 -0
  339. data/lib/pubid/ieee/components/relationship.rb +157 -0
  340. data/lib/pubid/ieee/components/typed_stage.rb +100 -0
  341. data/lib/pubid/ieee/identifier.rb +54 -0
  342. data/lib/pubid/ieee/identifiers/adopted_standard.rb +33 -0
  343. data/lib/pubid/ieee/identifiers/base.rb +591 -0
  344. data/lib/pubid/ieee/identifiers/conformance_identifier.rb +35 -0
  345. data/lib/pubid/ieee/identifiers/corrigendum.rb +37 -0
  346. data/lib/pubid/ieee/identifiers/csa_dual_published.rb +51 -0
  347. data/lib/pubid/ieee/identifiers/dual_identifier.rb +18 -0
  348. data/lib/pubid/ieee/identifiers/dual_published.rb +28 -0
  349. data/lib/pubid/ieee/identifiers/iec_ieee_copublished.rb +27 -0
  350. data/lib/pubid/ieee/identifiers/interpretation_identifier.rb +34 -0
  351. data/lib/pubid/ieee/identifiers/joint_development.rb +172 -0
  352. data/lib/pubid/ieee/identifiers/multi_numbered_identifier.rb +51 -0
  353. data/lib/pubid/ieee/identifiers/nesc/base.rb +56 -0
  354. data/lib/pubid/ieee/identifiers/nesc/draft.rb +28 -0
  355. data/lib/pubid/ieee/identifiers/nesc/handbook.rb +32 -0
  356. data/lib/pubid/ieee/identifiers/nesc/redline.rb +26 -0
  357. data/lib/pubid/ieee/identifiers/nesc/standard.rb +26 -0
  358. data/lib/pubid/ieee/identifiers/nesc.rb +15 -0
  359. data/lib/pubid/ieee/identifiers/parenthetical_identifier.rb +20 -0
  360. data/lib/pubid/ieee/identifiers/project_draft_identifier.rb +26 -0
  361. data/lib/pubid/ieee/identifiers/redlined_standard.rb +33 -0
  362. data/lib/pubid/ieee/identifiers/si_standard.rb +73 -0
  363. data/lib/pubid/ieee/identifiers/standard.rb +41 -0
  364. data/lib/pubid/ieee/identifiers/supplement_identifier.rb +23 -0
  365. data/lib/pubid/ieee/identifiers.rb +33 -0
  366. data/lib/pubid/ieee/ire/builder.rb +61 -0
  367. data/lib/pubid/ieee/ire/identifier.rb +58 -0
  368. data/lib/pubid/ieee/ire/parser.rb +91 -0
  369. data/lib/pubid/ieee/ire.rb +11 -0
  370. data/lib/pubid/ieee/nesc/builder.rb +101 -0
  371. data/lib/pubid/ieee/nesc/parser.rb +154 -0
  372. data/lib/pubid/ieee/nesc.rb +10 -0
  373. data/lib/pubid/ieee/parser.rb +1226 -0
  374. data/lib/pubid/ieee/scheme.rb +90 -0
  375. data/lib/pubid/ieee/typed_stages.rb +172 -0
  376. data/lib/pubid/ieee/urn_generator.rb +188 -0
  377. data/lib/pubid/ieee.rb +32 -0
  378. data/lib/pubid/ieee_debug.rb +31 -0
  379. data/lib/pubid/iho/builder.rb +37 -0
  380. data/lib/pubid/iho/identifier.rb +61 -0
  381. data/lib/pubid/iho/identifiers/base.rb +41 -0
  382. data/lib/pubid/iho/identifiers/bibliographic.rb +16 -0
  383. data/lib/pubid/iho/identifiers/circular_letter.rb +15 -0
  384. data/lib/pubid/iho/identifiers/miscellaneous.rb +16 -0
  385. data/lib/pubid/iho/identifiers/publication.rb +15 -0
  386. data/lib/pubid/iho/identifiers/standard.rb +15 -0
  387. data/lib/pubid/iho/identifiers.rb +14 -0
  388. data/lib/pubid/iho/parser.rb +68 -0
  389. data/lib/pubid/iho/scheme.rb +29 -0
  390. data/lib/pubid/iho/urn_generator.rb +29 -0
  391. data/lib/pubid/iho.rb +21 -0
  392. data/lib/pubid/iso/builder.rb +309 -0
  393. data/lib/pubid/iso/bundled_identifier.rb +85 -0
  394. data/lib/pubid/iso/combined_identifier.rb +22 -0
  395. data/lib/pubid/iso/components/code.rb +36 -0
  396. data/lib/pubid/iso/components/publisher.rb +60 -0
  397. data/lib/pubid/iso/components.rb +12 -0
  398. data/lib/pubid/iso/format_resolver.rb +45 -0
  399. data/lib/pubid/iso/identifier.rb +330 -0
  400. data/lib/pubid/iso/identifiers/addendum.rb +104 -0
  401. data/lib/pubid/iso/identifiers/amendment.rb +128 -0
  402. data/lib/pubid/iso/identifiers/base.rb +115 -0
  403. data/lib/pubid/iso/identifiers/corrigendum.rb +108 -0
  404. data/lib/pubid/iso/identifiers/data.rb +76 -0
  405. data/lib/pubid/iso/identifiers/directives.rb +59 -0
  406. data/lib/pubid/iso/identifiers/directives_supplement.rb +119 -0
  407. data/lib/pubid/iso/identifiers/extract.rb +30 -0
  408. data/lib/pubid/iso/identifiers/guide.rb +100 -0
  409. data/lib/pubid/iso/identifiers/international_standard.rb +168 -0
  410. data/lib/pubid/iso/identifiers/international_standardized_profile.rb +94 -0
  411. data/lib/pubid/iso/identifiers/international_workshop_agreement.rb +89 -0
  412. data/lib/pubid/iso/identifiers/pas.rb +93 -0
  413. data/lib/pubid/iso/identifiers/recommendation.rb +45 -0
  414. data/lib/pubid/iso/identifiers/supplement.rb +87 -0
  415. data/lib/pubid/iso/identifiers/tc_document.rb +108 -0
  416. data/lib/pubid/iso/identifiers/technical_report.rb +103 -0
  417. data/lib/pubid/iso/identifiers/technical_specification.rb +102 -0
  418. data/lib/pubid/iso/identifiers/technology_trends_assessments.rb +95 -0
  419. data/lib/pubid/iso/identifiers.rb +33 -0
  420. data/lib/pubid/iso/parser.rb +512 -0
  421. data/lib/pubid/iso/rendering_style.rb +120 -0
  422. data/lib/pubid/iso/scheme.rb +193 -0
  423. data/lib/pubid/iso/single_identifier.rb +64 -0
  424. data/lib/pubid/iso/supplement_identifier.rb +27 -0
  425. data/lib/pubid/iso/urn_generator.rb +426 -0
  426. data/lib/pubid/iso/urn_parser.rb +437 -0
  427. data/lib/pubid/iso/utilities.rb +86 -0
  428. data/lib/pubid/iso.rb +50 -0
  429. data/lib/pubid/itu/builder.rb +171 -0
  430. data/lib/pubid/itu/components/code.rb +39 -0
  431. data/lib/pubid/itu/components/sector.rb +35 -0
  432. data/lib/pubid/itu/components/series.rb +29 -0
  433. data/lib/pubid/itu/i18n.rb +9 -0
  434. data/lib/pubid/itu/i18n.yaml +30 -0
  435. data/lib/pubid/itu/identifier.rb +118 -0
  436. data/lib/pubid/itu/identifiers/amendment.rb +43 -0
  437. data/lib/pubid/itu/identifiers/annex.rb +74 -0
  438. data/lib/pubid/itu/identifiers/base.rb +154 -0
  439. data/lib/pubid/itu/identifiers/combined_identifier.rb +47 -0
  440. data/lib/pubid/itu/identifiers/corrigendum.rb +44 -0
  441. data/lib/pubid/itu/identifiers/recommendation.rb +16 -0
  442. data/lib/pubid/itu/identifiers/special_publication.rb +31 -0
  443. data/lib/pubid/itu/identifiers/supplement.rb +46 -0
  444. data/lib/pubid/itu/identifiers.rb +16 -0
  445. data/lib/pubid/itu/model.rb +111 -0
  446. data/lib/pubid/itu/parser.rb +225 -0
  447. data/lib/pubid/itu/scheme.rb +174 -0
  448. data/lib/pubid/itu/urn_generator.rb +105 -0
  449. data/lib/pubid/itu.rb +22 -0
  450. data/lib/pubid/jcgm/builder.rb +88 -0
  451. data/lib/pubid/jcgm/components/publisher.rb +20 -0
  452. data/lib/pubid/jcgm/components.rb +9 -0
  453. data/lib/pubid/jcgm/identifier.rb +54 -0
  454. data/lib/pubid/jcgm/identifiers/amendment.rb +35 -0
  455. data/lib/pubid/jcgm/identifiers/guide.rb +21 -0
  456. data/lib/pubid/jcgm/identifiers/gum_guide.rb +51 -0
  457. data/lib/pubid/jcgm/identifiers.rb +11 -0
  458. data/lib/pubid/jcgm/parser.rb +84 -0
  459. data/lib/pubid/jcgm/scheme.rb +60 -0
  460. data/lib/pubid/jcgm/single_identifier.rb +48 -0
  461. data/lib/pubid/jcgm/supplement_identifier.rb +16 -0
  462. data/lib/pubid/jcgm/urn_generator.rb +110 -0
  463. data/lib/pubid/jcgm.rb +31 -0
  464. data/lib/pubid/jis/builder.rb +124 -0
  465. data/lib/pubid/jis/components/code.rb +59 -0
  466. data/lib/pubid/jis/components.rb +9 -0
  467. data/lib/pubid/jis/identifier.rb +61 -0
  468. data/lib/pubid/jis/identifiers/amendment.rb +16 -0
  469. data/lib/pubid/jis/identifiers/base.rb +72 -0
  470. data/lib/pubid/jis/identifiers/explanation.rb +22 -0
  471. data/lib/pubid/jis/identifiers/japanese_industrial_standard.rb +16 -0
  472. data/lib/pubid/jis/identifiers/standard.rb +27 -0
  473. data/lib/pubid/jis/identifiers/technical_report.rb +31 -0
  474. data/lib/pubid/jis/identifiers/technical_specification.rb +31 -0
  475. data/lib/pubid/jis/identifiers.rb +17 -0
  476. data/lib/pubid/jis/parser.rb +109 -0
  477. data/lib/pubid/jis/scheme.rb +49 -0
  478. data/lib/pubid/jis/single_identifier.rb +37 -0
  479. data/lib/pubid/jis/supplement_identifier.rb +47 -0
  480. data/lib/pubid/jis/urn_generator.rb +25 -0
  481. data/lib/pubid/jis.rb +23 -0
  482. data/lib/pubid/lutaml/no_store_registration.rb +30 -0
  483. data/lib/pubid/nist/builder.rb +2269 -0
  484. data/lib/pubid/nist/components/code.rb +38 -0
  485. data/lib/pubid/nist/components/edition.rb +134 -0
  486. data/lib/pubid/nist/components/issue_number.rb +28 -0
  487. data/lib/pubid/nist/components/part.rb +77 -0
  488. data/lib/pubid/nist/components/publisher.rb +24 -0
  489. data/lib/pubid/nist/components/stage.rb +53 -0
  490. data/lib/pubid/nist/components/supplement.rb +188 -0
  491. data/lib/pubid/nist/components/translation.rb +42 -0
  492. data/lib/pubid/nist/components/update.rb +103 -0
  493. data/lib/pubid/nist/components/version.rb +35 -0
  494. data/lib/pubid/nist/components/volume.rb +32 -0
  495. data/lib/pubid/nist/components.rb +19 -0
  496. data/lib/pubid/nist/configuration.rb +77 -0
  497. data/lib/pubid/nist/identifier.rb +62 -0
  498. data/lib/pubid/nist/identifiers/base.rb +578 -0
  499. data/lib/pubid/nist/identifiers/circular.rb +68 -0
  500. data/lib/pubid/nist/identifiers/circular_supplement.rb +50 -0
  501. data/lib/pubid/nist/identifiers/commercial_standard.rb +41 -0
  502. data/lib/pubid/nist/identifiers/commercial_standard_emergency.rb +56 -0
  503. data/lib/pubid/nist/identifiers/commercial_standards_monthly.rb +56 -0
  504. data/lib/pubid/nist/identifiers/crpl_report.rb +132 -0
  505. data/lib/pubid/nist/identifiers/federal_information_processing_standards.rb +104 -0
  506. data/lib/pubid/nist/identifiers/grant_contractor_report.rb +35 -0
  507. data/lib/pubid/nist/identifiers/handbook.rb +50 -0
  508. data/lib/pubid/nist/identifiers/internal_report.rb +56 -0
  509. data/lib/pubid/nist/identifiers/letter_circular.rb +45 -0
  510. data/lib/pubid/nist/identifiers/miscellaneous_publication.rb +65 -0
  511. data/lib/pubid/nist/identifiers/monograph.rb +69 -0
  512. data/lib/pubid/nist/identifiers/ncstar.rb +41 -0
  513. data/lib/pubid/nist/identifiers/nsrds.rb +41 -0
  514. data/lib/pubid/nist/identifiers/owmwp.rb +35 -0
  515. data/lib/pubid/nist/identifiers/report.rb +67 -0
  516. data/lib/pubid/nist/identifiers/special_publication.rb +36 -0
  517. data/lib/pubid/nist/identifiers/technical_note.rb +90 -0
  518. data/lib/pubid/nist/identifiers.rb +33 -0
  519. data/lib/pubid/nist/parser.rb +1117 -0
  520. data/lib/pubid/nist/scheme.rb +199 -0
  521. data/lib/pubid/nist/supplement_identifier.rb +67 -0
  522. data/lib/pubid/nist/urn_generator.rb +133 -0
  523. data/lib/pubid/nist.rb +37 -0
  524. data/lib/pubid/oiml/builder.rb +189 -0
  525. data/lib/pubid/oiml/components/code.rb +20 -0
  526. data/lib/pubid/oiml/components.rb +9 -0
  527. data/lib/pubid/oiml/identifier.rb +61 -0
  528. data/lib/pubid/oiml/identifiers/amendment.rb +13 -0
  529. data/lib/pubid/oiml/identifiers/annex.rb +62 -0
  530. data/lib/pubid/oiml/identifiers/base.rb +36 -0
  531. data/lib/pubid/oiml/identifiers/basic_publication.rb +13 -0
  532. data/lib/pubid/oiml/identifiers/document.rb +13 -0
  533. data/lib/pubid/oiml/identifiers/expert_report.rb +13 -0
  534. data/lib/pubid/oiml/identifiers/guide.rb +13 -0
  535. data/lib/pubid/oiml/identifiers/recommendation.rb +13 -0
  536. data/lib/pubid/oiml/identifiers/seminar_report.rb +13 -0
  537. data/lib/pubid/oiml/identifiers/vocabulary.rb +13 -0
  538. data/lib/pubid/oiml/identifiers.rb +18 -0
  539. data/lib/pubid/oiml/parser.rb +173 -0
  540. data/lib/pubid/oiml/scheme.rb +46 -0
  541. data/lib/pubid/oiml/single_identifier.rb +90 -0
  542. data/lib/pubid/oiml/supplement_identifier.rb +43 -0
  543. data/lib/pubid/oiml/urn_generator.rb +64 -0
  544. data/lib/pubid/oiml.rb +26 -0
  545. data/lib/pubid/parser/common_parse_methods.rb +13 -0
  546. data/lib/pubid/parser/common_parse_rules.rb +56 -0
  547. data/lib/pubid/parser.rb +8 -0
  548. data/lib/pubid/parsers/base.rb +11 -0
  549. data/lib/pubid/parsers/mr_string.rb +93 -0
  550. data/lib/pubid/plateau/builder.rb +50 -0
  551. data/lib/pubid/plateau/identifier.rb +57 -0
  552. data/lib/pubid/plateau/identifiers/annex.rb +16 -0
  553. data/lib/pubid/plateau/identifiers/base.rb +51 -0
  554. data/lib/pubid/plateau/identifiers/handbook.rb +34 -0
  555. data/lib/pubid/plateau/identifiers/technical_report.rb +20 -0
  556. data/lib/pubid/plateau/identifiers.rb +12 -0
  557. data/lib/pubid/plateau/parser.rb +63 -0
  558. data/lib/pubid/plateau/scheme.rb +45 -0
  559. data/lib/pubid/plateau/supplement_identifier.rb +72 -0
  560. data/lib/pubid/plateau/urn_generator.rb +29 -0
  561. data/lib/pubid/plateau.rb +26 -0
  562. data/lib/pubid/renderers/base.rb +53 -0
  563. data/lib/pubid/renderers/directives_renderer.rb +61 -0
  564. data/lib/pubid/renderers/guide_renderer.rb +24 -0
  565. data/lib/pubid/renderers/human_readable.rb +70 -0
  566. data/lib/pubid/renderers/iwa_renderer.rb +20 -0
  567. data/lib/pubid/renderers/mr_string.rb +16 -0
  568. data/lib/pubid/renderers/supplement_renderer.rb +36 -0
  569. data/lib/pubid/renderers/urn.rb +11 -0
  570. data/lib/pubid/renderers.rb +14 -0
  571. data/lib/pubid/rendering/base.rb +73 -0
  572. data/lib/pubid/rendering/common.rb +211 -0
  573. data/lib/pubid/rendering/context.rb +159 -0
  574. data/lib/pubid/rendering/date.rb +27 -0
  575. data/lib/pubid/rendering/format.rb +25 -0
  576. data/lib/pubid/rendering/language.rb +21 -0
  577. data/lib/pubid/rendering/numbering.rb +24 -0
  578. data/lib/pubid/rendering/publisher.rb +25 -0
  579. data/lib/pubid/rendering/stage.rb +38 -0
  580. data/lib/pubid/rendering/supplement.rb +46 -0
  581. data/lib/pubid/rendering.rb +16 -0
  582. data/lib/pubid/sae/builder.rb +32 -0
  583. data/lib/pubid/sae/components/code.rb +9 -0
  584. data/lib/pubid/sae/components/date.rb +19 -0
  585. data/lib/pubid/sae/components/type.rb +19 -0
  586. data/lib/pubid/sae/components.rb +11 -0
  587. data/lib/pubid/sae/identifier.rb +37 -0
  588. data/lib/pubid/sae/identifiers/base.rb +42 -0
  589. data/lib/pubid/sae/identifiers.rb +9 -0
  590. data/lib/pubid/sae/parser.rb +55 -0
  591. data/lib/pubid/sae/scheme.rb +47 -0
  592. data/lib/pubid/sae/urn_generator.rb +38 -0
  593. data/lib/pubid/sae.rb +19 -0
  594. data/lib/pubid/scheme.rb +219 -0
  595. data/lib/pubid/urn_generator/base.rb +110 -0
  596. data/lib/pubid/utils/string_normalizer.rb +196 -0
  597. data/lib/pubid/utils.rb +7 -0
  598. data/lib/pubid/version.rb +3 -1
  599. data/lib/pubid.rb +137 -13
  600. data/lib/tasks/docs.rake +37 -0
  601. data/lib/tasks/export.rake +38 -0
  602. data/lib/tasks/website-data.json +7488 -0
  603. metadata +616 -171
  604. data/lib/pubid/registry.rb +0 -30
@@ -0,0 +1,1226 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parslet"
4
+
5
+ module Pubid
6
+ module Ieee
7
+ # Parser class for IEEE identifiers
8
+ # Single Responsibility: Parsing IEEE identifier syntax
9
+ # Note: IEEE is extremely complex with many edge cases
10
+ class Parser < Parslet::Parser
11
+ # Basic building blocks
12
+ rule(:space) { str(" ") }
13
+ rule(:space?) { space.maybe }
14
+ rule(:dash) { str("-") }
15
+ rule(:dash?) { dash.maybe }
16
+ rule(:dot) { str(".") }
17
+ rule(:slash) { str("/") }
18
+ rule(:comma) { str(", ") }
19
+ rule(:digit) { match("[0-9]") }
20
+ rule(:digits) { digit.repeat(1) }
21
+ rule(:letter) { match("[A-Za-z]") }
22
+ rule(:upper) { match("[A-Z]") }
23
+ rule(:lower) { match("[a-z]") }
24
+
25
+ # Year pattern (4 digits starting with 19 or 20), optionally followed by letter(s)
26
+ # e.g. 2012, 201x, 2010a
27
+ rule(:year_digits) do
28
+ (str("19") | str("20")) >> digit.repeat(2,
29
+ 2) >> lower.repeat(0,
30
+ 2) >> digits.absent?
31
+ end
32
+
33
+ # Month patterns - numeric format (01-12)
34
+ rule(:month_numeric) do
35
+ (str("0") >> match("[1-9]")) | # 01-09
36
+ (str("1") >> match("[0-2]")) # 10-12
37
+ end
38
+
39
+ # Comprehensive date parsing
40
+ # Format 1: "September 2018" or "Sept 2018" (text month + year)
41
+ rule(:date_with_month_text) do
42
+ month_name.as(:month) >> space >> year_digits.as(:year)
43
+ end
44
+
45
+ # Format 2: "2018-09" (year-numeric month)
46
+ rule(:date_with_month_numeric) do
47
+ year_digits.as(:year) >> dash >> month_numeric.as(:month)
48
+ end
49
+
50
+ # Format 3: Just year "2018"
51
+ rule(:date_year_only) do
52
+ year_digits.as(:year)
53
+ end
54
+
55
+ # Combined date rule - longest match first
56
+ rule(:date_standalone) do
57
+ date_with_month_text | date_with_month_numeric | date_year_only
58
+ end
59
+
60
+ # Month patterns
61
+ rule(:month_name) do
62
+ # Period-suffixed abbreviations (longest first)
63
+ str("Sept.") | str("Oct.") | str("Nov.") | str("Dec.") |
64
+ str("Jan.") | str("Feb.") | str("Mar.") | str("Apr.") |
65
+ str("Jun.") | str("Jul.") | str("Aug.") |
66
+ # Full month names
67
+ str("January") | str("February") | str("March") | str("April") |
68
+ str("May") | str("June") | str("July") | str("August") |
69
+ str("September") | str("October") | str("November") | str("December") |
70
+ # Non-period abbreviations
71
+ str("Jan") | str("Feb") | str("Mar") | str("Apr") | str("Jun") |
72
+ str("Jul") | str("Aug") | str("Sep") | str("Sept") | str("Oct") | str("Nov") | str("Dec")
73
+ end
74
+
75
+ # Organizations
76
+ rule(:organization) do
77
+ str("IEEE") | str("AIEE") | str("ANSI") | str("ASA") |
78
+ str("IEC") | str("ISO") | str("ASTM") | str("CSA") | str("ASME") |
79
+ str("NACE") | str("NSF") | str("ASHRAE") | str("NCTA") | str("AESC") |
80
+ str("EIA") # NEW Session 224: Add EIA support
81
+ end
82
+
83
+ # Complex organization prefixes (Category 5: ANSI Complex)
84
+ rule(:complex_org_prefix) do
85
+ str("ANSI/IEEE-ANS") | str("ANSI/IEEE") | str("ANSI")
86
+ end
87
+
88
+ # Characteristic IEEE number patterns (without prefix)
89
+ # These patterns are distinctly IEEE even without "IEEE Std" prefix
90
+ rule(:characteristic_ieee_number) do
91
+ # C37.xxx series (power systems) - C followed by 2 digits, dot, more digits
92
+ (str("C") >> digit.repeat(2,
93
+ 2) >> dot >> digits >> match("[a-z]").repeat.maybe) |
94
+ # 802.xxx series (networking) - 802 followed by dot, digits, optional letter suffix
95
+ (str("802") >> dot >> digits >> match("[a-z]").repeat.maybe) |
96
+ # P followed by digits (draft projects)
97
+ (str("P") >> digits.repeat(1))
98
+ end
99
+
100
+ rule(:publisher) do
101
+ complex_org_prefix.as(:publisher) | organization.as(:publisher)
102
+ end
103
+
104
+ rule(:copublisher) do
105
+ # Three-way copublisher strings (treat as single unit, longest first)
106
+ str("/ISO/IEC").as(:copublisher) |
107
+ str("/IEC/ISO").as(:copublisher) |
108
+ # Two-way copublishers (original pattern)
109
+ (slash >> space? >> organization.as(:copublisher))
110
+ end
111
+
112
+ # Conformance document patterns (/Conformance01-2003, /Conformance02-2014)
113
+ # Allow optional space before slash for malformed inputs
114
+ rule(:conformance) do
115
+ (space? >> slash >> str("Conformance") >> match("[0-9]").repeat(1).as(:conf_number) >> dash >> year_digits.as(:conf_year)).as(:conformance)
116
+ end
117
+
118
+ # ASHRAE joint publication patterns (/ASHRAE Guideline 21-2012)
119
+ # Also handles /ASHRAE 21 without "Guideline"
120
+ rule(:ashrae_copub) do
121
+ (slash >> str("ASHRAE") >> space >>
122
+ (str("Guideline") >> space).maybe >>
123
+ digits.as(:ashrae_number) >>
124
+ (dash >> year_digits.as(:ashrae_year)).maybe).as(:ashrae_copub)
125
+ end
126
+
127
+ # IEEE cross-reference patterns (/C62.22.1-1996)
128
+ # References another IEEE standard from a specific series (e.g., C62, C37, C57)
129
+ rule(:ieee_crossref) do
130
+ (slash >> str("C") >> digits >> dot >> digits >> dot >> digits >> dash >> year_digits).as(:ieee_crossref)
131
+ end
132
+
133
+ # Document number - support letters and digits, with optional prefix P
134
+ # Complex multi-part numbers like P11073-10404-10419 should be fully captured
135
+ # But simple cases like "623-1976" should not consume the dash before year
136
+ rule(:number) do
137
+ (str("P").maybe >>
138
+ (digits | upper).repeat(1) >> # The first component must be at least one digit
139
+ # Only consume dash+digits if followed by another dash+digits (multi-part pattern)
140
+ # OR if the digits don't look like a year (not 4 digits starting with 19/20)
141
+ # This prevents consuming "623-1976" as a number but allows "P11073-10404-10419"
142
+ (dash >> digits >> year_digits.absent? >> (dash >> digits).repeat).maybe >>
143
+ lower.maybe).as(:number)
144
+ end
145
+
146
+ # Type - handle "No." and "No" (case-insensitive), longest first
147
+ rule(:type_word) do
148
+ str("Draft Std") | str("STD") | str("Standard") |
149
+ str("Std No.") | str("Std") | # Add "Std No." before "Std"
150
+ str("PTC") | # ASME Performance Test Code
151
+ (match("[Nn]") >> str("o.")) | (match("[Nn]") >> str("o")) |
152
+ str("No")
153
+ end
154
+
155
+ # Part and subpart - handle both dot and dash separators
156
+ rule(:part) do
157
+ (dot | dash) >> match("[0-9A-Za-z]").repeat(1).as(:part)
158
+ end
159
+
160
+ rule(:subpart) do
161
+ (dot | dash | str("_")) >>
162
+ ((str("REV") | str("Rev")).maybe >> match("[0-9a-z]").repeat(1) >>
163
+ (dot >> digits).maybe).as(:subpart)
164
+ end
165
+
166
+ # Year component - updated to use comprehensive date parsing
167
+ rule(:year) do
168
+ (dot | dash) >> date_standalone >> str("(E)").maybe
169
+ end
170
+
171
+ # Draft patterns
172
+ rule(:draft_status) do
173
+ (str("Active Unapproved") | str("Unapproved") | str("Approved")) >> space
174
+ end
175
+
176
+ rule(:draft_prefix) do
177
+ space? >> (str("/") | str("_") | dash | space)
178
+ end
179
+
180
+ rule(:draft_version) do
181
+ # Enhanced to handle multiple draft notation patterns
182
+ # D is optional to handle /08 style drafts (e.g., IEEE P1052/08)
183
+ (str("D") >> str("IS").absent?).maybe >> # Avoid matching "DIS" (ISO stage)
184
+ (
185
+ # Pattern: D3.1 (decimal with 1-2 digits on each side) - MOST COMMON, put first
186
+ # Also handles trailing letter: D7.3A, D2.0E
187
+ (match("[0-9]").repeat(1,
188
+ 2) >> dot >> match("[0-9]").repeat(1,
189
+ 2) >> lower.maybe) |
190
+ # Pattern: D.XX (decimal starting with dot) - e.g., D.19
191
+ (dot >> digits) |
192
+ # Pattern: DX+X (plus sign) - e.g., D1+1
193
+ (digits >> str("+") >> digits) |
194
+ # Pattern: DXXXXeYY or DXXXX.eYY (complex) - e.g., D2012.e27
195
+ (digits >> dot.maybe >> str("e") >> digits) |
196
+ # Pattern: D-X or DX or DX-d or DX_letter (original patterns)
197
+ # Handles: D12, D3.0, D043Rev18, suffixes like D15Sept
198
+ (str("-").maybe >> match("[0-9A-Za-z]").repeat(1) >> (str("-d") | (str("_") >> match("[0-9A-Za-z]").repeat(0))).maybe)
199
+ ).as(:draft_version)
200
+ end
201
+
202
+ rule(:draft_date) do
203
+ # Enhanced to handle: ", Sept 2008" or " Sept 2008" or ", Month Year"
204
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)) |
205
+ (((space? >> comma >> space?) | space) >> month_name.as(:month) >>
206
+ (
207
+ ((space >> digits.as(:day)).maybe >> comma >> year_digits.as(:year)) |
208
+ (comma >> space? >> year_digits.as(:year)) |
209
+ (space >> year_digits.as(:year))
210
+ ))
211
+ end
212
+
213
+ # FDIS and similar ISO stage codes without D prefix (Pattern 3)
214
+ # These appear after / in IEEE P identifiers but don't have the D prefix
215
+ # Examples: IEEE P15939/FDIS, IEEE P1234/CDV
216
+ rule(:fdraft) do
217
+ (slash >>
218
+ (str("FDIS") | str("CDV") | str("CD") | str("WD") | str("PWI") | str("NP")) >>
219
+ (
220
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)) | # Month Year
221
+ ((comma | space) >> year_digits.as(:year)) # Year only (e.g., /FDIS, 2016)
222
+ ).maybe >>
223
+ parenthetical.maybe).as(:fdraft)
224
+ end
225
+
226
+ rule(:draft) do
227
+ (draft_prefix >> draft_version.repeat(1, 2) >>
228
+ (dot >> digits.as(:revision)).maybe >>
229
+ draft_date.maybe).as(:draft)
230
+ end
231
+
232
+ # Edition - enhanced to support IEC formats like "Edition 1.0 2015-03"
233
+ rule(:edition) do
234
+ (comma >> year_digits.as(:year) >> str(" Edition")) |
235
+ ((space | dash) >> str("Edition ") >>
236
+ (digits >> dot >> digits).as(:edition) >>
237
+ (space | str(" - ")) >>
238
+ year_digits.as(:year) >>
239
+ (dash >> digit.repeat(2, 2).as(:edition_month)).maybe) # Capture -MM as edition_month
240
+ end
241
+
242
+ # Part/subpart/year combinations
243
+ rule(:part_subpart_year) do
244
+ (part >> subpart.repeat(1, 2) >> year) |
245
+ (part >> subpart >> year) |
246
+ (part >> year) |
247
+ (part >> subpart) |
248
+ year |
249
+ part
250
+ end
251
+
252
+ # Corrigendum
253
+ rule(:corrigendum) do
254
+ # Enhanced: Accept space as separator, make separators more flexible
255
+ # Also accept "Corrigendum" as alternative to "Cor"
256
+ ((str("_") | slash | dash | space) >>
257
+ (str("Corrigendum") | str("Cor")) >>
258
+ (dash | dot | space).maybe >> # More flexible separator after "Cor"
259
+ space? >> # Add space handling after separator
260
+ digits.as(:cor_number).maybe >>
261
+ ((dash | str(":") | space) >> year_digits.as(:cor_year)).maybe).as(:corrigendum)
262
+ end
263
+
264
+ # Amendment
265
+ rule(:amendment) do
266
+ (slash >> str("Amd") >> digits.as(:amd_number) >>
267
+ (dash >> year_digits.as(:amd_year)).maybe).as(:amendment)
268
+ end
269
+
270
+ # Interpretation notation (/INT)
271
+ # Enhanced to support optional year suffix: /INT-1991, /INT 1991
272
+ rule(:interpretation) do
273
+ (slash >> str("INT") >> ((dash | str(":") | space) >> year_digits.as(:int_year)).maybe).as(:interpretation)
274
+ end
275
+
276
+ # Reaffirmed - enhanced to support (R1992) format without space
277
+ rule(:reaffirmed) do
278
+ (
279
+ # Format: "Reaffirmed 1992"
280
+ (str("Reaffirmed ") >> year_digits.as(:year)) |
281
+ # Format: "(R1992)" - parentheses with R prefix (with or without space before)
282
+ (space.maybe >> str("(R") >> year_digits.as(:year) >> str(")"))
283
+ ).as(:reaffirmed)
284
+ end
285
+
286
+ # Redline
287
+ rule(:redline) do
288
+ str(" - Redline").as(:redline)
289
+ end
290
+
291
+ # Book nickname (e.g., "[The Orange Book]", "[IEEE Gold Book]")
292
+ rule(:book_nickname) do
293
+ space >> str("[") >> match("[^\\]]").repeat(1).as(:nickname) >> str("]")
294
+ end
295
+
296
+ # Relationship type keywords for Pattern 4 identifiers
297
+ rule(:relationship_revision_of) do
298
+ str("Revision of ") | str("Revison of ")
299
+ end
300
+ rule(:relationship_amendment_to) { str("Amendment to ") }
301
+ rule(:relationship_corrigendum_to) do
302
+ str("Corrigendum to ") | str("Corrigenda to ")
303
+ end
304
+ rule(:relationship_incorporates) do
305
+ str("incorporates ") | str("Incorporating ") | str("Incorporates ")
306
+ end
307
+ rule(:relationship_adoption_of) { str("Adoption of ") }
308
+ rule(:relationship_supplement_to) { str("Supplement to ") }
309
+ rule(:relationship_draft_amendment) do
310
+ str("Draft Amendment to ") | str("DRAFT Amendment to ")
311
+ end
312
+ rule(:relationship_draft_revision) { str("Draft Revision of ") }
313
+ rule(:relationship_reaffirmation) { str("Reaffirmation of ") }
314
+ rule(:relationship_redesignation) do
315
+ str("Redesignation of ") | str("redesignated as ")
316
+ end
317
+ rule(:relationship_supersedes) { str("Supersedes ") | str("Supercedes ") }
318
+ rule(:relationship_previously_designated) do
319
+ str("Previously designated as ")
320
+ end
321
+ rule(:relationship_includes) { str("Includes ") } # NEW Session 171
322
+
323
+ # Combined relationship type (longest match first)
324
+ rule(:relationship_type) do
325
+ relationship_draft_amendment.as(:draft_amendment_to) |
326
+ relationship_draft_revision.as(:draft_revision_of) |
327
+ relationship_previously_designated.as(:previously_designated_as) |
328
+ relationship_reaffirmation.as(:reaffirmation_of) |
329
+ relationship_redesignation.as(:redesignation_of) |
330
+ relationship_supersedes.as(:supersedes) |
331
+ relationship_includes.as(:includes) | # NEW Session 171
332
+ relationship_revision_of.as(:revision_of) |
333
+ relationship_amendment_to.as(:amendment_to) |
334
+ relationship_corrigendum_to.as(:corrigendum_to) |
335
+ relationship_incorporates.as(:incorporates) |
336
+ relationship_adoption_of.as(:adoption_of) |
337
+ relationship_supplement_to.as(:supplement_to)
338
+ end
339
+
340
+ # Identifier string (for parsing list of related identifiers)
341
+ # Captures text until delimiter: comma, closing paren, "and", " / ", "; ", "as amended by"
342
+ # Uses absent? to ensure we stop at these delimiters
343
+ rule(:identifier_string) do
344
+ (
345
+ str(", and ").absent? >>
346
+ str(" and ").absent? >>
347
+ str(", ").absent? >>
348
+ str(" as amended by ").absent? >>
349
+ str(" / ").absent? >>
350
+ str("; ").absent? >>
351
+ str(")").absent? >>
352
+ match(".")
353
+ ).repeat(1)
354
+ end
355
+
356
+ # Identifier list (comma and "and" separated)
357
+ rule(:identifier_list) do
358
+ identifier_string.as(:id) >>
359
+ (
360
+ (str(", and ") | str(" and ") | str(", ")) >>
361
+ identifier_string.as(:id)
362
+ ).repeat
363
+ end
364
+
365
+ # "as amended by" clause with identifier list
366
+ rule(:as_amended_by_clause) do
367
+ # Variant 1: "as amended by IEEE's X, Y, Z"
368
+ (str(" as amended by IEEE's ") >> identifier_list.as(:amendments)) |
369
+ # Variant 2: "as amended by X, Y, Z" (standard)
370
+ (str(" as amended by ") >> identifier_list.as(:amendments)) |
371
+ # Variant 3: "and its approved amendments" (no specific list)
372
+ str(" and its approved amendments").as(:approved_amendments)
373
+ end
374
+
375
+ # Relationship clause (handles all relationship types)
376
+ rule(:relationship_clause) do
377
+ space.maybe >> str("(") >>
378
+ relationship_type.as(:relationship_type) >>
379
+ identifier_list.as(:related_ids) >>
380
+ as_amended_by_clause.maybe >>
381
+ # Handle multiple relationships separated by " / " OR "; "
382
+ (
383
+ (str(" / ") | str("; ")) >> # Support both separators
384
+ relationship_type.as(:relationship_type) >>
385
+ identifier_list.as(:related_ids) >>
386
+ as_amended_by_clause.maybe
387
+ ).repeat.as(:additional_rels) >>
388
+ str(")")
389
+ end
390
+
391
+ # Title portion separated by colon (Category 8)
392
+ rule(:title_portion) do
393
+ str(":") >> space >> match('[^\n]').repeat(1).as(:title)
394
+ end
395
+
396
+ # Approved Draft suffix (Category 7)
397
+ rule(:approved_draft_suffix) do
398
+ (space >> str("- (Approved Draft)")) | (space >> str("(Approved Draft)"))
399
+ end
400
+
401
+ # Additional parameters (inside parentheses)
402
+ rule(:additional_parameters) do
403
+ (space.maybe >> str("(") >> # Make space before '(' optional
404
+ (reaffirmed |
405
+ # Handle "Revision of IEEE Std ..." with optional space after Std
406
+ (str("Revision of IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
407
+ # Handle typo "Revison of IEEE Std ..." with optional space after Std
408
+ (str("Revison of IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
409
+ # Handle "Revision to IEEE Std ..." with optional space after Std
410
+ (str("Revision to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
411
+ # Handle "Revison to IEEE Std ..." with optional space after Std
412
+ (str("Revison to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
413
+ # Amendment patterns (case-insensitive DRAFT)
414
+ ((str("DRAFT") | str("Draft") | str("draft")) >> str(" Amendment to ") >> match("[^)]").repeat(1).as(:draft_amendment_to)) |
415
+ (str("Amendment to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:amendment_to)) |
416
+ # Adoption patterns
417
+ (str("Adoption of ") >> match("[^)]").repeat(1).as(:adoption)) |
418
+ # Other specific patterns
419
+ (str("Notebooks") >> space? >> match("[^,\\)]").repeat(1).as(:notebooks)) |
420
+ (str("Standard Newspaper(s)") >> space? >> match("[^,\\)]").repeat(1).as(:standard_newspapers)) |
421
+ # Catch-all for any other parenthetical content (MUST BE LAST)
422
+ match("[^)]").repeat(1).as(:parenthetical_content)
423
+ ) >>
424
+ str(")").maybe).as(:parameters)
425
+ end
426
+
427
+ # Parenthetical - try relationship_clause first, then fall back to additional_parameters
428
+ rule(:parenthetical) do
429
+ relationship_clause | additional_parameters
430
+ end
431
+
432
+ # IEC/IEEE copublished pattern - handle all variations comprehensively
433
+ # BUT exclude P prefix patterns (those are joint development)
434
+ rule(:iec_ieee_copublished) do
435
+ str("IEC/IEEE") >>
436
+ space >>
437
+ str("P").absent? >> # NOT a P prefix (would be joint development)
438
+ match("[^\n]").repeat(1).as(:content)
439
+ end
440
+
441
+ # Joint development patterns (ISO/IEC/IEEE in either IEEE or ISO format)
442
+ rule(:joint_development_ieee_format) do
443
+ # ISO/IEC/IEEE P26511/D8-2018 or ISO/IEEE P1003.1-2008 or IEC/IEEE P62582-1-2011
444
+ # ALSO handle: IEC/IEEE P60780-323, CDV1 2014 (comma before stage code)
445
+ # ALSO handle: IEEE/CSA P844.1/293.1/D2 (CSA dual numbering)
446
+ (str("ISO/IEC/IEEE") | str("ISO/IEEE") | str("IEC/IEEE") | str("IEEE/CSA")).as(:joint_publishers) >>
447
+ space >>
448
+ str("P") >> # P indicates IEEE-led
449
+ digits.as(:number) >>
450
+ ((dot | dash) >> digits.as(:part)).maybe >> # Optional part like .1 or -1
451
+ # CSA dual numbering: /293.1 (second number)
452
+ (slash >> digits >> (dot >> digits).maybe >> (dash >> digits.as(:draft_version)).maybe).maybe >>
453
+ (
454
+ # Variant 1: /D8 notation (original)
455
+ (slash >> str("D") >> digits.as(:draft_version)) |
456
+ # Variant 2: , CDV1 notation (comma before stage code)
457
+ (comma >> (str("CDV") | str("FDIS") | str("CD") | str("DIS")).as(:iec_stage) >> digits.maybe.as(:stage_iteration))
458
+ ).maybe >>
459
+ ((dash >> year_digits.as(:year)) | # Either -YEAR
460
+ (comma.maybe >> space >> month_name.as(:month) >> space.maybe >> year_digits.as(:year))).maybe # Or Month YEAR (with optional comma)
461
+ end
462
+
463
+ rule(:joint_development_iso_format) do
464
+ # ISO/IEC/IEEE FDIS 26511:2018 (ISO-led format)
465
+ (str("ISO/IEC/IEEE") | str("ISO/IEEE") | str("IEC/IEEE")).as(:joint_publishers) >>
466
+ space >>
467
+ # ISO stage codes
468
+ (str("FDIS") | str("DIS") | str("CD") | str("WD") | str("PWI") | str("NP")).as(:iso_stage) >>
469
+ space >>
470
+ digits.as(:number) >>
471
+ ((dot | dash) >> digits.as(:part)).maybe >> # Optional part
472
+ (str(":") >> year_digits.as(:year)).maybe
473
+ end
474
+
475
+ # Number-first pattern: "1873-2015 IEEE Standard..."
476
+ rule(:number_first_identifier) do
477
+ number >>
478
+ (dash >> year_digits.as(:year)).maybe >>
479
+ space >>
480
+ (publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >>
481
+ space >>
482
+ (type_word.as(:type) >> space?).maybe >>
483
+ match("[^\n]").repeat(0).as(:title)
484
+ end
485
+
486
+ # IEEE P pattern (without Std): "IEEE P1003.1..." OR just "P1003.1..." (prefix optional)
487
+ rule(:ieee_p_identifier) do
488
+ (str("IEEE").as(:publisher) >> space).maybe >> # Make IEEE prefix optional
489
+ str("P") >> space.maybe >> # Make space after P optional
490
+ number >>
491
+ (part_subpart_year | edition).maybe >>
492
+ # Pattern for /08 style drafts (digits without D prefix) - MUST come before corrigendum
493
+ (slash >> digits.as(:draft_version)).as(:digit_draft).maybe >>
494
+ # FDIS and other ISO stage codes without D prefix (Pattern 3)
495
+ fdraft.maybe >>
496
+ # Enhanced: Accept both comma and space before month/year
497
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
498
+ corrigendum.maybe >>
499
+ draft.maybe >>
500
+ # ALSO accept month/year after draft (some patterns like /DX, Month YEAR)
501
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
502
+ parenthetical.maybe
503
+ end
504
+
505
+ # ANSI P pattern: "ANSI PN42.34-D9a, 2015" OR "ANSI P1234/D5"
506
+ rule(:ansi_p_identifier) do
507
+ str("ANSI").as(:publisher) >> space >>
508
+ str("P") >> space.maybe >> # Make space after P optional
509
+ number >>
510
+ (part_subpart_year | edition).maybe >>
511
+ # Enhanced: Accept both comma and space before month/year
512
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
513
+ corrigendum.maybe >>
514
+ draft.maybe >>
515
+ # ALSO accept month/year after draft
516
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
517
+ # Accept bare year after draft: ", 2015"
518
+ ((comma | space) >> year_digits.as(:year)).maybe >>
519
+ parenthetical.maybe
520
+ end
521
+
522
+ # IEEE Draft P pattern: "IEEE Draft P802.11..." OR "Draft P802.11..." (IEEE prefix optional)
523
+ rule(:ieee_draft_p_identifier) do
524
+ (str("IEEE").as(:publisher) >> space).maybe >> # Make IEEE prefix optional
525
+ str("Draft") >> space >>
526
+ str("P") >>
527
+ number >>
528
+ (part_subpart_year | edition).maybe >>
529
+ # Enhanced: Accept month/year after draft number
530
+ (space >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
531
+ draft.maybe >>
532
+ parenthetical.maybe
533
+ end
534
+
535
+ # IEEE Approved Draft pattern: "IEEE Approved Draft Std P..."
536
+ rule(:ieee_approved_draft_identifier) do
537
+ str("IEEE").as(:publisher) >>
538
+ space >>
539
+ str("Approved") >> space >>
540
+ (str("Draft Std") | str("Std")).as(:type) >> space >>
541
+ str("P").maybe >>
542
+ number >>
543
+ (part_subpart_year | edition).maybe >>
544
+ draft.maybe >>
545
+ parenthetical.maybe
546
+ end
547
+
548
+ # Combined AIEE identifier pattern: "AIEE No 72-1932 and AIEE No 73-1932"
549
+ # Handles "and"-separated AIEE identifiers (from "Nos X and Y" preprocessing)
550
+ rule(:combined_aiee_identifier) do
551
+ # First AIEE identifier
552
+ Aiee::Parser.new.aiee_identifier.as(:first_aiee) >>
553
+ # "and" separator
554
+ space >> str("and") >> space >>
555
+ # Second AIEE identifier
556
+ Aiee::Parser.new.aiee_identifier.as(:second_aiee)
557
+ end
558
+
559
+ # AIEE (American Institute of Electrical Engineers) patterns
560
+ # Detect AIEE patterns and delegate to AIEE parser
561
+ rule(:aiee_identifier) do
562
+ # Lookahead for AIEE patterns - do not consume input
563
+ (
564
+ # IEEE-AIEE transitional pattern
565
+ (str("IEEE-AIEE") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard") | str("Trans."))) |
566
+ # A.I.E.E. pattern (with dots, no spaces)
567
+ (str("A.I.E.E.") >> space >> (str("No.") | str("Nos") | str("No"))) |
568
+ # A. I. E. E. pattern (with dots and spaces)
569
+ (str("A. I. E. E.") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard"))) |
570
+ # AIEE pattern - extended to include more type words
571
+ (str("AIEE") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard") | str("Trans.") | str("Std")))
572
+ ).present? >>
573
+ # Delegate to AIEE parser if pattern detected
574
+ Aiee::Parser.new.aiee_identifier.as(:aiee)
575
+ end
576
+
577
+ # IRE (Institute of Radio Engineers) patterns
578
+ # Detect IRE patterns and delegate to IRE parser
579
+ rule(:ire_identifier) do
580
+ # Lookahead for IRE patterns - do not consume input
581
+ (
582
+ # Year-first pattern: "52 IRE 7.S2" or "60 IRE 28 PS7"
583
+ ((match("[1-6]") >> digit >> space >> str("IRE")) | # 2-digit year format
584
+ (str("19") >> digit.repeat(2, 2) >> space >> str("IRE"))) |
585
+ # IEEE-IRE transitional pattern
586
+ (str("IEEE-IRE") >> space)
587
+ ).present? >>
588
+ # Delegate to IRE parser if pattern detected
589
+ Ire::Parser.new.ire_identifier.as(:ire)
590
+ end
591
+
592
+ # NESC (National Electrical Safety Code) patterns
593
+ # Detect NESC patterns and delegate to NESC parser
594
+ rule(:nesc_identifier) do
595
+ # Lookahead for NESC patterns - do not consume input
596
+ (
597
+ # C2-YYYY pattern
598
+ (str("C2-") >> year_digits) |
599
+ # YYYY NESC pattern
600
+ (year_digits >> space >> (str("NESC") | str("National Electrical Safety Code"))) |
601
+ # Draft NESC pattern
602
+ (str("Draft") >> space >> (str("NESC") | str("National Electrical Safety Code"))) |
603
+ # Name-first pattern (NEW)
604
+ (str("National Electrical Safety Code") >> str(",") >> space >> str("C2-"))
605
+ ).present? >>
606
+ # Delegate to NESC parser if pattern detected
607
+ Nesc::Parser.new.nesc_identifier.as(:nesc)
608
+ end
609
+
610
+ # IEEE/ASTM SI/PSI (Système International) patterns
611
+ # SI = Published metric system standard
612
+ # PSI = Proposed SI (draft)
613
+ rule(:ieee_astm_si_psi) do
614
+ str("IEEE/ASTM").as(:publishers) >>
615
+ space >>
616
+ (str("PSI") | str("SI")).as(:si_type) >>
617
+ space >>
618
+ digits.as(:number) >>
619
+ # Draft notation for PSI (e.g., /D2, /D3)
620
+ (slash >> str("D") >> digits.as(:draft_version)).maybe >>
621
+ # Year with optional month
622
+ (
623
+ # Format: ", Month Year"
624
+ (comma >> month_name.as(:month) >> space >> year_digits.as(:year)) |
625
+ # Format: "-YEAR"
626
+ (dash >> year_digits.as(:year))
627
+ ).maybe >>
628
+ # Optional parenthetical (revision relationships)
629
+ parenthetical.maybe
630
+ end
631
+
632
+ # No-prefix IEEE identifier (characteristic patterns without "IEEE Std")
633
+ # These are patterns that are distinctly IEEE even without explicit publisher
634
+ rule(:no_prefix_ieee) do
635
+ characteristic_ieee_number.as(:number) >>
636
+ # Optional suffix (like -a, -b)
637
+ (dash >> match("[A-Za-z]")).maybe.as(:suffix) >>
638
+ # Optional year
639
+ (dash >> year_digits).maybe.as(:year) >>
640
+ # Optional draft notation
641
+ draft.maybe >>
642
+ # Optional language portion
643
+ (str("(E)") | str("(F)")).maybe >>
644
+ # Optional parenthetical content
645
+ parenthetical.maybe
646
+ end
647
+
648
+ # Corrigendum identifier with recursive base parsing
649
+ # Captures base identifier for recursive parsing, then corrigendum supplement
650
+ # Example: IEEE Std 535-2013/Cor. 1-2017
651
+ rule(:corrigendum_identifier) do
652
+ # Match a complete base identifier (reuse existing patterns)
653
+ # Try standard patterns that would match "IEEE Std 535-2013"
654
+ (
655
+ ((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
656
+ (type_word.as(:type) >> space?).maybe >>
657
+ number >>
658
+ part_subpart_year.maybe # This captures the full identifier before /Cor
659
+ ).as(:base_identifier) >>
660
+ # Now match the corrigendum portion
661
+ (slash | dash | space) >>
662
+ str("Cor") >>
663
+ (dash | dot | space).maybe >> # More flexible separator after "Cor"
664
+ space? >>
665
+ digits.as(:cor_number) >>
666
+ ((dash | str(":") | space) >> year_digits.as(:cor_year)).maybe >> # Optional cor year suffix
667
+ parenthetical.maybe
668
+ end
669
+
670
+ # Interpretation identifier with recursive base parsing
671
+ # Captures base identifier for recursive parsing, then interpretation supplement
672
+ # Example: IEEE Std 1076/INT-1991, IEEE Std 1003.1-1988/INT
673
+ rule(:interpretation_identifier) do
674
+ # Match a complete base identifier
675
+ (
676
+ ((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
677
+ (type_word.as(:type) >> space?).maybe >>
678
+ number >>
679
+ part_subpart_year.maybe
680
+ ).as(:base_identifier) >>
681
+ # Now match the interpretation portion
682
+ (slash | dash | space) >>
683
+ str("INT") >>
684
+ ((dash | str(":") | space) >> year_digits.as(:int_year)).maybe >> # Optional year suffix
685
+ parenthetical.maybe
686
+ end
687
+
688
+ # Conformance identifier with recursive base parsing
689
+ # Captures base identifier for recursive parsing, then conformance supplement
690
+ # Example: IEEE Std 802.16/Conformance01-2003
691
+ rule(:conformance_identifier) do
692
+ # Match a complete base identifier
693
+ (
694
+ ((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
695
+ (type_word.as(:type) >> space?).maybe >>
696
+ number >>
697
+ part_subpart_year.maybe
698
+ ).as(:base_identifier) >>
699
+ # Now match the conformance portion
700
+ (slash | dash | space) >>
701
+ str("Conformance") >>
702
+ match("[0-9]").repeat(1).as(:conf_number) >>
703
+ dash >>
704
+ year_digits.as(:conf_year) >>
705
+ parenthetical.maybe
706
+ end
707
+
708
+ # Multi-numbered identifier: same document with multiple numbers
709
+ # Examples: IEEE Std 1299/C62.22.1-1996, IEEE Std 960-1989, Std 1177-1989
710
+ rule(:multi_numbered_identifier) do
711
+ # Primary identifier (full IEEE identifier)
712
+ ((
713
+ (publisher >> space).maybe >>
714
+ (type_word.as(:type) >> space?).maybe >>
715
+ number >>
716
+ (part_subpart_year | edition).maybe
717
+ ).as(:primary_identifier) >>
718
+ # Separator: slash for cross-ref format, comma for joint standard
719
+ (slash >> str("C") >> digits >> dot >> digits >> dot >> digits >> dash >> year_digits).as(:secondary_crossref)) |
720
+ (comma >> space >> (type_word.as(:type) >> space?).maybe >> number >> dash >> year_digits).as(:secondary_joint)
721
+ end
722
+
723
+ # CSA dual published pattern: IEEE Std 844.1-2017/CSA C22.2 No. 293.1-17
724
+ rule(:csa_dual_published) do
725
+ # IEEE portion (full identifier)
726
+ (
727
+ publisher >> space >>
728
+ (type_word.as(:type) >> space?).maybe >>
729
+ number >>
730
+ (part_subpart_year | edition).maybe
731
+ ).as(:ieee_portion) >>
732
+ # CSA portion with slash separator
733
+ slash >>
734
+ str("CSA") >> space >>
735
+ # CSA number formats (various patterns observed)
736
+ (
737
+ # Format 1: C22.2 No. 293.1-17 (with NO.)
738
+ (str("C") >> digit.repeat(2) >> dot >> digit >> space >> str("No") >> dot >> space >>
739
+ match("[0-9.]").repeat(1) >> (dash | str(":")) >> digit.repeat(2)) |
740
+ # Format 2: C293.2-17 (without NO., dash year)
741
+ (str("C") >> match("[0-9.]").repeat(1) >> dash >> digit.repeat(2)) |
742
+ # Format 3: C22.2 No. 293.3:19 (with NO., colon year)
743
+ (str("C") >> digit.repeat(2) >> dot >> digit >> space >> str("No") >> dot >> space >>
744
+ match("[0-9.]").repeat(1) >> str(":") >> digit.repeat(2)) |
745
+ # Format 4: C293.4:19 (without NO., colon year)
746
+ (str("C") >> match("[0-9.]").repeat(1) >> str(":") >> digit.repeat(2))
747
+ ).as(:csa_portion)
748
+ end
749
+
750
+ # Basic IEEE identifier (no dual PubIDs or complex revisions yet)
751
+ rule(:identifier) do
752
+ combined_aiee_identifier |
753
+ aiee_identifier |
754
+ combined_aiee_identifier |
755
+ ire_identifier |
756
+ nesc_identifier |
757
+ ieee_astm_si_psi | # NEW Session 171: Add IEEE/ASTM SI/PSI support
758
+ multi_numbered_identifier | # NEW: Try multi-numbered identifiers before generic patterns
759
+ csa_dual_published | # NEW: Try CSA dual published before generic patterns
760
+ corrigendum_identifier | # NEW: Try corrigendum before generic patterns
761
+ interpretation_identifier | # NEW: Try interpretation identifier before generic patterns
762
+ conformance_identifier | # NEW: Try conformance identifier before generic patterns
763
+ joint_development_ieee_format |
764
+ joint_development_iso_format |
765
+ iec_ieee_copublished |
766
+ number_first_identifier |
767
+ ieee_approved_draft_identifier |
768
+ ieee_draft_p_identifier |
769
+ ieee_p_identifier |
770
+ ansi_p_identifier | # NEW: ANSI P prefix support
771
+ (((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >> # Make publisher optional
772
+ draft_status.as(:draft_status).maybe >>
773
+ (str("Draft Std").as(:type) >> space?).maybe >>
774
+ (type_word.as(:type) >> (space >> str("No") >> space).maybe >> space?).maybe >>
775
+ number >>
776
+ (part_subpart_year | edition).maybe >>
777
+ corrigendum.maybe >>
778
+ amendment.maybe >>
779
+ interpretation.maybe >> # NEW: Add /INT support
780
+ conformance.maybe >> # NEW: Add /Conformance support
781
+ ashrae_copub.maybe >> # NEW: Add /ASHRAE Guideline support
782
+ ieee_crossref.maybe >> # NEW: Add /C62.22.1-1996 cross-reference support
783
+ draft.maybe >>
784
+ # Enhanced: Accept both comma and space before month/year
785
+ ((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
786
+ edition.maybe >>
787
+ parenthetical.maybe >> # REVERT: Back to single parenthetical
788
+ book_nickname.maybe >> # NEW: Add book nickname support
789
+ redline.maybe >>
790
+ title_portion.maybe >>
791
+ approved_draft_suffix.maybe) |
792
+ no_prefix_ieee # NEW: Try no-prefix patterns last (lowest priority)
793
+ end
794
+
795
+ root(:identifier)
796
+
797
+ def self.parse(string)
798
+ # Strip .pdf extension if present (Pattern 3: File Extensions)
799
+ cleaned = string.sub(/\.pdf$/i, "")
800
+
801
+ # Note: IEC and ANSI identifiers are NOT filtered here because they can have
802
+ # IEEE co-publication or adoption. The Base.parse method handles determining
803
+ # which standards are actually IEEE-related.
804
+ # ISO-only standards are still filtered as they have separate handling.
805
+
806
+ # Pattern 3: Replace underscore before ISO stage codes with slash
807
+ # These are joint development drafts that use underscore instead of slash
808
+ cleaned = cleaned.gsub(/_(FDIS|CDV|CD|DIS|WD|PWI|NP)/, '/\1')
809
+
810
+ # NEW: Normalize multiple spaces to single space
811
+ # No valid IEEE identifier pattern needs more than 1 space
812
+ cleaned = cleaned.gsub(/\s+/, " ")
813
+
814
+ # NEW Session 171: CONSERVATIVE data quality fixes for TODO.IEEE-MUST-DO.txt
815
+ # Only fix clear typos: space before dash + 4-digit year, OR dash + space + 4-digit year
816
+ # Do NOT touch " - " (space-dash-space) which is valid formatting
817
+ cleaned = cleaned.gsub(/(\d)\s+-(\d{4})\b/, '\1-\2') # "C37.101 -2006" → "C37.101-2006"
818
+ cleaned = cleaned.gsub(/(\d)-\s+(\d{4})\b/, '\1-\2') # "C62.35- 2010" → "C62.35-2010"
819
+
820
+ # NEW Session 171: HTML entity for en dash (&#x2013;)
821
+ # ONLY convert if not already followed by a dash (avoid creating --)
822
+ cleaned = cleaned.gsub(/&#x2013;(?!-)/, "-") # En dash → regular hyphen (if not followed by dash)
823
+ cleaned = cleaned.gsub("&#x2013;-", "-") # En-dash-dash → single dash
824
+
825
+ # NEW Session 171: Remove wrong ! prefix
826
+ cleaned = cleaned.gsub(/^!IEEE /, "IEEE ")
827
+
828
+ # NEW Session 171: Fix "IEEE/ ASTM" spacing (extra space after slash)
829
+ cleaned = cleaned.gsub("IEEE/ ASTM", "IEEE/ASTM")
830
+
831
+ # NEW Phase 1: Handle HTML entities comprehensively
832
+ cleaned = cleaned.gsub("&#x2122;", "™") # Trademark symbol
833
+ cleaned = cleaned.gsub("&#x2019;", "'") # Smart apostrophe
834
+ cleaned = cleaned.gsub("&amp;amp;", "&") # Double-encoded ampersand
835
+ cleaned = cleaned.gsub("&amp;", "&") # Single-encoded ampersand
836
+
837
+ # NEW: Wrap P&V notation in parentheses (Paper & Video, etc.)
838
+ # Pattern: "IEEE Std 500-1984 P&V" → "IEEE Std 500-1984 (P&V)"
839
+ cleaned = cleaned.gsub(/\s+(P&V)\s*$/, ' (\1)')
840
+
841
+ # NEW Phase 1: Fix number spacing issues (e.g., "C57.1 2.25" → "C57.12.25")
842
+ # This handles cases where a space appears in the middle of a number
843
+ cleaned = cleaned.gsub(/(\d+\.\d+)\s+(\d+\.)/, '\1\2')
844
+
845
+ # NEW Phase 1: Fix year spacing issues (e.g., "1 996" → "1996")
846
+ # Remove spaces within 4-digit years
847
+ cleaned = cleaned.gsub(/\b(1|2)\s+(\d{3})\b/, '\1\2')
848
+
849
+ # NEW: Fix month+year spacing (e.g., "March2016" → "March 2016")
850
+ # Add space between month name and 4-digit year when they're concatenated
851
+ cleaned = cleaned.gsub(
852
+ /\b(January|February|March|April|May|June|July|August|September|October|November|December)(\d{4})\b/, '\1 \2'
853
+ )
854
+ # Also handle abbreviated months
855
+ cleaned = cleaned.gsub(
856
+ /\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)(\d{4})\b/, '\1 \2'
857
+ )
858
+
859
+ # NEW: Convert IEC/IEEE space-separated to semicolon format
860
+ # Pattern: "IEC 61523-3 First edition 2004-09; IEEE 1497" → already semicolon
861
+ # Pattern: "IEC 62539 First Edition 2007-07 IEEE 930" → needs semicolon
862
+ # Match: IEC identifier (with edition) + space + IEEE identifier
863
+ # Be conservative: only convert if IEC has "First edition" or similar and followed by IEEE
864
+ cleaned = cleaned.gsub(
865
+ /(IEC\s+\d+(?:-\d+)?(?:\s+First?\s+Edition\s+\d{4}-\d{2})?)\s+(IEEE\s+\S+)/, '\1; \2'
866
+ )
867
+
868
+ # NEW Phase 1 (Session 141): Remove literal trademark symbol
869
+ # "C57.110™-2018" → "C57.110-2018"
870
+ cleaned = cleaned.gsub(/™/, "")
871
+
872
+ # NEW Phase 1 (Session 141): Fix specific year typo
873
+ # "19969" → "1969" (very specific pattern, won't affect other text)
874
+ cleaned = cleaned.gsub(/\b19969\b/, "1969")
875
+
876
+ # NEW Session 169: Fix comma typo in 802.3 series numbers
877
+ # "802.3ch-2020,802.3ca-2020" → "802.3ch-2020, 802.3ca-2020"
878
+ # Very specific: 4 digits, comma, 3 digits (likely 802.3xx typo)
879
+ cleaned = cleaned.gsub(/(\d{4}),(\d{3})/, '\1, \2')
880
+
881
+ # NEW Session 169: Fix /lNT typo (lowercase L as 1)
882
+ # "1003.1/2003.l/lNT" → "1003.1/2003.1/INT"
883
+ cleaned = cleaned.gsub(/\/lNT\b/, "/INT")
884
+ cleaned = cleaned.gsub(".l/", ".1/") # Also fix .l/ -> .1/
885
+
886
+ # NEW Session 169: Fix I99O typo (letter I and O instead of digits)
887
+ # "IEEE 1076-CONC-I99O" → "IEEE 1076-CONC-1990"
888
+ cleaned = cleaned.gsub(/\bI99O\b/, "1990")
889
+
890
+ # NEW: Fix common typos (Category 9)
891
+ cleaned = cleaned.gsub(/^EEE /, "IEEE ")
892
+
893
+ # NEW Session 170: Additional safe typo fixes
894
+ # Fix "I EEE" (space between I and EEE)
895
+ cleaned = cleaned.gsub(/^I EEE /, "IEEE ")
896
+
897
+ # Fix "lEEE" (lowercase L instead of I)
898
+ cleaned = cleaned.gsub(/^lEEE /, "IEEE ")
899
+
900
+ # Fix missing closing parenthesis at end only (very conservative)
901
+ # Only if there's exactly one more opening than closing paren
902
+ open_count = cleaned.count("(")
903
+ close_count = cleaned.count(")")
904
+ if open_count == close_count + 1 && !cleaned.end_with?(")")
905
+ cleaned = "#{cleaned})"
906
+ end
907
+
908
+ # NEW Phase 1: Remove trailing commas/colons and text
909
+ cleaned = cleaned.gsub(/,\s*Standard\s*$/, "") # ", Standard" at end
910
+ cleaned = cleaned.gsub(/[,:]\s*$/, "") # Trailing comma/colon
911
+ cleaned = cleaned.gsub(/,\s+and\s+IEEE\s+Std\s/, " and ") # Handle "IEEE Std and Std" case
912
+
913
+ # Enhanced: Fix unbalanced parentheses comprehensively
914
+ # Handle three cases: missing closing, extra opening, nested unbalanced
915
+ open_count = cleaned.count("(")
916
+ close_count = cleaned.count(")")
917
+
918
+ if open_count > close_count
919
+ # More opening than closing - add closing parens at end
920
+ # This handles both simple missing and nested unbalanced cases
921
+ missing = open_count - close_count
922
+ cleaned = cleaned + (")" * missing)
923
+ elsif close_count > open_count
924
+ # More closing than opening - remove extra closing from end
925
+ # Very conservative: only remove trailing excess closing parens
926
+ extra = close_count - open_count
927
+ cleaned = cleaned.sub(/\){#{extra}}$/, "")
928
+ end
929
+
930
+ # === SESSION 173: TODO.IEEE-MUST-DO.txt Preprocessing Enhancements ===
931
+
932
+ # Part A: Simple Normalizations (Lines 13, 16, 32-35, 36, 39-41 from TODO)
933
+
934
+ # 1. Missing dash before year: "802.16g 2007" → "802.16g-2007"
935
+ # But be careful not to affect month names (already have space)
936
+ # Only apply if: digit + space + 4-digit year (and not after a month name)
937
+ cleaned = cleaned.gsub(/(\d)\s+(\d{4})(?=\s*\(|\s*$)/, '\1-\2')
938
+
939
+ # 2. Space-dash-space before year: "802.1ag - 2007" → "802.1ag-2007"
940
+ # This is distinct from " - " in titles, targets space-dash-space-year pattern
941
+ cleaned = cleaned.gsub(/\s+-\s+(\d{4})\b/, '-\1')
942
+
943
+ # 3. Add missing "Std" after IEEE: "IEEE 1070-1995" → "IEEE Std 1070-1995"
944
+ # Only at start of string, IEEE + space + digit
945
+ cleaned = cleaned.gsub(/^IEEE\s+(?!Std\b)(\d)/, 'IEEE Std \1')
946
+
947
+ # 3.5. Convert "IEEE No." to "IEEE Std": "IEEE No. 264-1968" → "IEEE Std 264-1968"
948
+ # NOTE: Do NOT convert AIEE No - AIEE uses "No" as standard format
949
+ cleaned = cleaned.gsub(/^IEEE\s+No\.\s*/, "IEEE Std ")
950
+ cleaned = cleaned.gsub(/^IEEE\s+No\s/, "IEEE Std ")
951
+ # Skip AIEE No conversion - AIEE preserves "No" format
952
+
953
+ # 4. Space before slash in dual published: "262-1973 /ANSI" → "262-1973/ANSI"
954
+ cleaned = cleaned.gsub(/\s+\//, "/")
955
+
956
+ # 5. Comma before Edition: ", 1998 Edition" → "-1998"
957
+ # Normalize to standard year format for parser
958
+ cleaned = cleaned.gsub(/,\s+(\d{4})\s+Edition/, '-\1')
959
+
960
+ # 6. ISO/IEC spacing: "ISO/IEC15802" → "ISO/IEC 15802"
961
+ # Add space between publisher prefix and number
962
+ cleaned = cleaned.gsub(/(ISO\/IEC)(\d)/, '\1 \2')
963
+
964
+ # Part B: Publisher Order (Line 38 from TODO)
965
+
966
+ # Fix wrong publisher order: "IEEE Std ANSI/IEEE" → "ANSI/IEEE Std"
967
+ # This handles cases where IEEE Std appears before ANSI/IEEE publisher
968
+ cleaned = cleaned.gsub(/^IEEE\s+Std\s+(ANSI\/IEEE)/, '\1 Std')
969
+
970
+ # Part C: Dual Published Formats (Lines 8, 19 from TODO)
971
+
972
+ # 1. Semicolon to parenthetical for dual published (MultiLabeledIdentifier)
973
+ # "IEEE Std 120-1955; ASME PTC 19.6-1955" → "IEEE Std 120-1955 (ASME PTC 19.6-1955)"
974
+ # Only if semicolon + space + organization abbreviation (capital letters)
975
+ if cleaned.match?(/;\s+[A-Z]{2,}/)
976
+ cleaned = cleaned.sub(/;\s+([A-Z][^;]+)$/, ' (\1)')
977
+ end
978
+
979
+ # === SESSION 174: Additional TODO.IEEE-MUST-DO.txt Preprocessing ===
980
+
981
+ # Part A: Edition Abbreviation Normalization (Lines 10-11)
982
+ # Pattern: ", 1999 Edn. (Reaff 2003)" → "-1999 (R2003)"
983
+ # Normalize both the Edition abbreviation and the Reaffirmed format
984
+ cleaned = cleaned.gsub(/,\s+(\d{4})\s+Edn\.\s+\(Reaff\s+(\d{4})\)/,
985
+ '-\1 (R\2)')
986
+ # Also handle without initial comma (might occur in relationships)
987
+ cleaned = cleaned.gsub(/(\d{4})\s+Edn\.\s+\(Reaff\s+(\d{4})\)/,
988
+ '\1 (R\2)')
989
+
990
+ # Part B: IRE Parenthetical Split (Line 9)
991
+ # Pattern: "(Reaffirmed 1980, 56 IRE 28.S2)" → "(R1980) (56 IRE 28.S2)"
992
+ # Split nested reaffirmation + IRE reference into two parentheticals
993
+ cleaned = cleaned.gsub(/\(Reaffirmed\s+(\d{4}),\s+(\d+\s+IRE[^)]+)\)/,
994
+ '(R\1) (\2)')
995
+
996
+ # Part C: Slash to Parenthetical (Line 37)
997
+ # Pattern: "number-year/ANSI identifier" → "number-year (ANSI identifier)"
998
+ # Only convert if slash is followed by ANSI and NOT a relationship keyword
999
+ # Look ahead to ensure we're at end of main identifier (before paren or end of string)
1000
+ cleaned = cleaned.gsub(%r{(\d{4})/ANSI\s+([^(]+)(?=\s*\(|$)},
1001
+ '\1 (ANSI \2)')
1002
+
1003
+ # Part D: ISO/IEC TR Spacing (Line 40)
1004
+ # Pattern: "ISO/IEC TR11802" → "ISO/IEC TR 11802"
1005
+ # Add space after TR when directly followed by digit
1006
+ cleaned = cleaned.gsub(/(ISO\/IEC\s+TR)(\d)/, '\1 \2')
1007
+ # === SESSION 178: AIEE Dual Numbers Expansion (Line 45) ===
1008
+
1009
+ # Part E: AIEE "Nos X and Y" Expansion
1010
+ # Pattern: "AIEE Nos 72 and 73 - 1932" → "AIEE No 72-1932 and AIEE No 73-1932"
1011
+ # Expands dual AIEE numbers to separate identifiers with shared year
1012
+ if cleaned.match?(/AIEE\s+Nos\s+(\d+)\s+and\s+(\d+)\s+-\s+(\d{4})/)
1013
+ cleaned = cleaned.sub(/AIEE\s+Nos\s+(\d+)\s+and\s+(\d+)\s+-\s+(\d{4})/) do
1014
+ first_num = $1
1015
+ second_num = $2
1016
+ year = $3
1017
+ "AIEE No #{first_num}-#{year} and AIEE No #{second_num}-#{year}"
1018
+ end
1019
+ end
1020
+
1021
+ # === SESSION 222: TODO.IEEE-MUST-FIX-IDs.txt Comprehensive Fixes ===
1022
+
1023
+ # Part A: Typo Fixes
1024
+ # 1. "Stad" -> "Std" (typo)
1025
+ cleaned = cleaned.gsub(/\bStad\b/, "Std")
1026
+
1027
+ # 2. Lowercase "std" -> "Std" when after IEEE/ANSI publishers
1028
+ cleaned = cleaned.gsub(/\b(IEEE|ANSI|AIEE)\s+std\b/, '\1 Std')
1029
+
1030
+ # Part B: Symbol Normalization
1031
+ # 3. Additional (TM) patterns - strip them out
1032
+ cleaned = cleaned.gsub("(TM)", "")
1033
+
1034
+ # Part C: Year-first format normalization
1035
+ # 4. Pattern "62704-4/D4, 2020" -> "IEEE P62704-4/D4, 2020"
1036
+ # Only if starts with digits-dash-digits/D pattern
1037
+ if cleaned.match?(/^(\d+[-.]\d+)\/D\d+/)
1038
+ cleaned = "IEEE P#{cleaned}"
1039
+ end
1040
+
1041
+ # Part D: Suffix Normalization
1042
+ # 5. "/Preprint" -> remove (data quality - not standard suffix)
1043
+ cleaned = cleaned.gsub(/\/Preprint\b/, "")
1044
+
1045
+ # Part E: Relationship Text Normalization
1046
+ # 6. "Proposed Revision of" -> "Revision of"
1047
+ cleaned = cleaned.gsub("Proposed Revision of", "Revision of")
1048
+
1049
+ # 7. "ammended" typo -> "amended"
1050
+ cleaned = cleaned.gsub(/\bammended\b/i, "amended")
1051
+
1052
+ # Part F: Trailing Characters After Special Patterns
1053
+ # 8. Remove trailing periods after /INT, /Cor, etc.
1054
+ cleaned = cleaned.gsub(/(\/INT|\/Cor\s+\d+-\d{4})\./, '\1')
1055
+
1056
+ # Part G: Conformance Pattern Spacing
1057
+ # 9. Fix spacing in "/Conformance" patterns WITHOUT year (malformed only)
1058
+ # "1904.1(TM)/Conformance02" -> "1904.1 /Conformance02" (space before slash)
1059
+ # BUT: DO NOT touch valid patterns like "802.16/Conformance01-2003" (with year)
1060
+ # Use positive check for year suffix to exclude valid patterns
1061
+ # Actually, this preprocessing is breaking valid patterns - just remove it entirely
1062
+ # The parser can handle both "6/Conformance01-2003" and "6 /Conformance02" formats
1063
+
1064
+ # Part H: Edition Text After /INT
1065
+ # 10. Handle ", Month YYYY Edition" after /INT by converting to month-year format
1066
+ # "1003.1/INT, March 1994 Edition" -> "1003.1/INT, March 1994"
1067
+ cleaned = cleaned.gsub(/(\/INT),\s+([A-Z][a-z]+)\s+(\d{4})\s+Edition/,
1068
+ '\1, \2 \3')
1069
+
1070
+ # Part I: Handle "Ed." abbreviation
1071
+ # 11. "Dec. 1994 Ed." -> "Dec. 1994"
1072
+ cleaned = cleaned.gsub(/\s+Ed\.\s*$/, "")
1073
+
1074
+ # === PHASE 2: High-impact preprocessing for fixture failures ===
1075
+
1076
+ # Quick wins from SESSION 224 (must come before more complex fixes)
1077
+
1078
+ # Remove period after "Std": "IEEE Std." -> "IEEE Std"
1079
+ cleaned = cleaned.gsub(/\bStd\.\s+/, "Std ")
1080
+
1081
+ # Redline Suffix Removal: " - Redline" at end
1082
+ cleaned = cleaned.gsub(/\s+-\s+Redline\b.*$/, "")
1083
+
1084
+ # Title portion removal after year: "YYYY - IEEE Standard for..."
1085
+ cleaned = cleaned.gsub(
1086
+ /(\d{4})(\s+\([^)]+\))?\s+-\s+IEEE\s+Standard\s+for.*$/, '\1\2'
1087
+ )
1088
+
1089
+ # Fix 2A: "IEEE PC" prefix -> "IEEE Std PC" or "IEEE P" treatment
1090
+ # "IEEE PC37.20.9/D7.3A" -> needs to parse as IEEE project draft
1091
+ # Strategy: Add "Std" after "IEEE" when followed by "PC" to route to standard pattern
1092
+ # Actually, the issue is the number rule consumes "PC37" as P + C37.
1093
+ # Better: normalize "IEEE PC" to "IEEE Std PC" so it hits the standard identifier path
1094
+ cleaned = cleaned.gsub(/^IEEE\s+PC(\d)/, 'IEEE Std PC\1')
1095
+ cleaned = cleaned.gsub(/^IEEE\s+Unapproved\s+Draft\s+Std\s+PC(\d)/,
1096
+ 'IEEE Unapproved Draft Std PC\1')
1097
+
1098
+ # Fix 2B: "IEEE P" without "Std"/"Draft" prefix
1099
+ # ieee_p_identifier rule handles these directly - no preprocessing needed
1100
+ # Only handle "IEEE P" followed by "and ASHRAE" (copub case)
1101
+ cleaned = cleaned.gsub(/^IEEE\s+P(\d+)\s+and\s+ASHRAE/,
1102
+ 'IEEE Std P\1 and ASHRAE')
1103
+
1104
+ # Fix 2C: "ISO/IEC XXXX-YYYY: Title" -> strip title after colon for ISO/IEC published standards
1105
+ # These are ISO-format identifiers with IEEE adoption, strip the title
1106
+ cleaned = cleaned.gsub(/^(ISO\/IEC \d+[-.]\d+-\d{4}):.*$/, '\1')
1107
+ cleaned = cleaned.gsub(/^(ISO\/IEC \d+-\d{4}):.*$/, '\1')
1108
+
1109
+ # Fix 2D: "ISO/IEC XXXX : YYYY" -> normalize spacing around colon
1110
+ cleaned = cleaned.gsub(/^(ISO\/IEC \d+[-.]\d*)\s*:\s*(\d{4})/, '\1:\2')
1111
+ cleaned = cleaned.gsub(/^(ISO\/IEC \d+)\s*:\s*(\d{4})/, '\1:\2')
1112
+
1113
+ # Fix 2G: "IEC/IEEE PXXX_D5" -> underscore to slash
1114
+ cleaned = cleaned.gsub(/^(IEC\/IEEE P[\w.-]+)_D/, '\1/D')
1115
+
1116
+ # Fix 2H: "IEC XXXX First edition YYYY-MM; IEEE NNNN" -> normalize semicolon
1117
+ # Already handled by earlier semicolon normalization
1118
+
1119
+ # Fix 2I: "IEEE/ISO/IEC PXXX/DIS" -> normalize to "ISO/IEC/IEEE PXXX/DIS"
1120
+ cleaned = cleaned.gsub(/^IEEE\/ISO\/IEC\s+(P[\w.-]+)/,
1121
+ 'ISO/IEC/IEEE \1')
1122
+ cleaned = cleaned.gsub(/^IEEE\/IEC\/ISO\s+(P[\w.-]+)/,
1123
+ 'IEC/ISO/IEEE \1')
1124
+
1125
+ # Fix 2J: "IEEE/IEC PXXX D5" -> normalize space to slash before D
1126
+ cleaned = cleaned.gsub(/^(IEEE\/IEC P[\w.-]+)\s+D(\d)/, '\1/D\2')
1127
+ cleaned = cleaned.gsub(
1128
+ /^(IEEE\/IEC P[\w.-]+)\s+(CDV|FDIS|CD|DIS|ED\d)/, '\1/\2'
1129
+ )
1130
+
1131
+ # Fix 2K: "ISO /IEC/IEEE" -> fix space before slash
1132
+ cleaned = cleaned.gsub(/^ISO\s+\/IEC\/IEEE/, "ISO/IEC/IEEE")
1133
+ cleaned = cleaned.gsub(/^ISO\s+\/IEC/, "ISO/IEC")
1134
+
1135
+ # Fix 2L: "IS0" typo (letter O instead of digit 0)
1136
+ cleaned = cleaned.gsub(/^IS0\//, "ISO/")
1137
+
1138
+ # Fix 2M: "IEEE-P15026-3-DIS-January 2015" -> dash-separated format
1139
+ # Normalize to "ISO/IEC/IEEE P15026-3/DIS, January 2015"
1140
+ cleaned = cleaned.gsub(/^IEEE-P(\d+)-(\d+)-DIS-(.*)/,
1141
+ 'ISO/IEC/IEEE P\1-\2/DIS, \3')
1142
+
1143
+ # Fix 2N: "IEEE/CSA P844.1/293.1/D2" -> normalize CSA dual numbering
1144
+ cleaned = cleaned.gsub(/^IEEE\/CSA\s+(P[\d.]+)\/([\d.]+)\/D(\d+)/,
1145
+ 'IEEE/CSA \1/D\3')
1146
+
1147
+ # Fix 2O: "IEEE Approved Draft Std P" -> normalize spacing
1148
+ cleaned = cleaned.gsub(/^IEEE\s+Approved\s+Draft\s+Std\s+(P\d)/,
1149
+ 'IEEE Approved Draft Std \1')
1150
+ # Fix: "IEEE Approved Draft Std P1234 / D12" -> remove space before slash
1151
+ cleaned = cleaned.gsub(/^(IEEE Approved Draft Std P[\w.-]+)\s+\/\s*D/,
1152
+ '\1/D')
1153
+
1154
+ # Fix 2P: "IEEE/EIA" -> normalize (parser handles IEEE/EIA via copublisher)
1155
+ # Already works - no fix needed
1156
+
1157
+ # Fix 2Q: AIEE format variations
1158
+ # "AIEE No.1C-1954" -> "AIEE No. 1C-1954" (add space after No.)
1159
+ cleaned = cleaned.gsub(/^AIEE\s+No\.\s*(\d)/, 'AIEE No. \1')
1160
+ # "AIEE no 700-1945" -> "AIEE No 700-1945" (capitalize)
1161
+ cleaned = cleaned.gsub(/^AIEE\s+no\s/, "AIEE No ")
1162
+ # "AIEE Std No. 800" -> "AIEE Standard No 800" (normalize type word)
1163
+ cleaned = cleaned.gsub(/^AIEE\s+Std\s+No\.\s*/, "AIEE Standard No ")
1164
+ # "AIEE No 750.1-1960" -> handled by AIEE parser if decimal support added
1165
+
1166
+ # Fix 2R: "IEEE PSI 10/D2" -> normalize to "IEEE/ASTM PSI 10/D2"
1167
+ cleaned = cleaned.gsub(/^IEEE\s+PSI\s+(\d)/, 'IEEE/ASTM PSI \1')
1168
+
1169
+ # Fix 2S: "IEEE/IEC P62271-111/PC37.60_D5" -> normalize
1170
+ cleaned = cleaned.gsub(/^(IEEE\/IEC P[\d.-]+\/PC[\d.]+)_D/, '\1/D')
1171
+
1172
+ # Fix 2T: "IEC P62271-111/IEEE PC37.60_D5" -> normalize to IEC/IEEE format
1173
+ cleaned = cleaned.gsub(/^IEC\s+(P[\d.-]+)\/IEEE\s+(PC[\d.]+)_D/,
1174
+ 'IEC/IEEE \2/D')
1175
+
1176
+ # Fix 2U: "IEC/IEC P" -> "IEC/IEEE P" (typo)
1177
+ cleaned = cleaned.gsub(/^IEC\/IEC\s+(P\d)/, 'IEC/IEEE \1')
1178
+
1179
+ # Fix 2V: "NACE SPXXXX-YYYY/IEEE Std NNNN-YYYY" -> normalize slash to parenthetical
1180
+ cleaned = cleaned.gsub(/^(NACE\s+SP\d+-\d+)\/(IEEE\s+Std\s+\d+-\d+)$/,
1181
+ '\1 (\2)')
1182
+
1183
+ # Fix 2W: "IEEE Std 802.11g-2003 (Amendment to IEEE Std 802.11, 1999 Edn. (Reaff 2003) as amended by"
1184
+ # This is a complex relationship - strip the parenthetical if too complex
1185
+ # Let the parser handle it but fix "Edn." to "Edition"
1186
+ cleaned = cleaned.gsub("Edn.", "Edition")
1187
+
1188
+ # Fix 2X: "IEEE-P15026-3-DIS" format -> normalize
1189
+ # Already handled by Fix 2M
1190
+
1191
+ # Fix 2Y: "P1635/D10/ASHARE 21/D10" -> fix ASHARE typo to ASHRAE
1192
+ cleaned = cleaned.gsub("ASHARE", "ASHRAE")
1193
+
1194
+ # Fix 2Z: "PC37.30.2/D043 Rev 18" -> normalize draft version with Rev
1195
+ # "PC57-15 D2.0" -> normalize to "P57-15/D2.0"
1196
+ cleaned = cleaned.gsub(/^PC(\d)/, 'P\1')
1197
+
1198
+ # Fix 2AA: "IEEE/ISO/IEC 8802-1Q-2020/Amd31-2021" -> normalize
1199
+ cleaned = cleaned.gsub(/^IEEE\/ISO\/IEC\s+(8802[\w.-]+)/,
1200
+ 'ISO/IEC/IEEE \1')
1201
+
1202
+ # Fix 2AB: "IEEE C57.139/D14June 2010" -> add missing space
1203
+ cleaned = cleaned.gsub(
1204
+ /^(IEEE\s+C?\d[\d.]*\/D\d+)([A-Z][a-z]+\s+\d{4})/, '\1, \2'
1205
+ )
1206
+
1207
+ # Fix 2AC: "IEEE Std: Title" -> strip colon and title (ANSI/IEEE Std: )
1208
+ cleaned = cleaned.gsub(/^(ANSI\/IEEE Std):\s+.*$/, '\1')
1209
+
1210
+ # Fix 2AD: "IEEE 1076 IEC 61691-1-1 First edition 2004-10" -> semicolon format
1211
+ cleaned = cleaned.gsub(
1212
+ /^(IEEE\s+[\d.]+)\s+(IEC\s+\d+[-\d]*\s+.*edition\s+\d{4}-\d{2})$/i, '\1; \2'
1213
+ )
1214
+
1215
+ # Fix 2AE: "IEEE No 29-1941 / ASA C77.1-1943" -> normalize to IEEE Std format
1216
+ cleaned = cleaned.gsub(/^IEEE\s+No\s+(\d+-\d+)\s+\/\s+ASA\s+(.*)/,
1217
+ 'IEEE Std \1 (ASA \2)')
1218
+
1219
+ # Fix 2AF: "IEEE Std 1003.1/2003.l/lNT" -> fix typos
1220
+ # .l -> .1 and lNT -> INT handled by existing fixes
1221
+
1222
+ new.parse(cleaned)
1223
+ end
1224
+ end
1225
+ end
1226
+ end