pubid 2.0.0.pre.alpha.2 → 2.0.0.pre.alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +5 -1
- data/data/nist/update_codes.yaml +25 -0
- data/lib/pubid/amca/builder.rb +2 -2
- data/lib/pubid/amca/identifier.rb +7 -39
- data/lib/pubid/amca/identifiers/base.rb +0 -26
- data/lib/pubid/amca/identifiers/interpretation.rb +0 -17
- data/lib/pubid/amca/identifiers/publication.rb +0 -13
- data/lib/pubid/amca/renderer.rb +82 -0
- data/lib/pubid/amca/single_identifier.rb +0 -23
- data/lib/pubid/amca/urn_parser.rb +28 -0
- data/lib/pubid/amca.rb +42 -1
- data/lib/pubid/ansi/builder.rb +5 -3
- data/lib/pubid/ansi/identifier.rb +1 -43
- data/lib/pubid/ansi/identifiers/american_national_standard.rb +2 -1
- data/lib/pubid/ansi/identifiers/standard.rb +2 -3
- data/lib/pubid/ansi/renderer.rb +53 -0
- data/lib/pubid/ansi/single_identifier.rb +2 -31
- data/lib/pubid/ansi/urn_generator.rb +3 -38
- data/lib/pubid/ansi/urn_parser.rb +23 -0
- data/lib/pubid/ansi.rb +38 -3
- data/lib/pubid/api/builder.rb +29 -74
- data/lib/pubid/api/identifier.rb +0 -51
- data/lib/pubid/api/identifiers/base.rb +0 -2
- data/lib/pubid/api/identifiers/bulletin.rb +0 -2
- data/lib/pubid/api/identifiers/continuous_operations_standard.rb +0 -2
- data/lib/pubid/api/identifiers/mpms.rb +1 -17
- data/lib/pubid/api/identifiers/publication.rb +0 -2
- data/lib/pubid/api/identifiers/recommended_practice.rb +0 -2
- data/lib/pubid/api/identifiers/specification.rb +0 -2
- data/lib/pubid/api/identifiers/standard.rb +0 -2
- data/lib/pubid/api/identifiers/technical_report.rb +0 -2
- data/lib/pubid/api/identifiers/typeless_standard.rb +1 -14
- data/lib/pubid/api/identifiers.rb +18 -0
- data/lib/pubid/api/renderer.rb +89 -0
- data/lib/pubid/api/single_identifier.rb +1 -13
- data/lib/pubid/api/urn_generator.rb +0 -18
- data/lib/pubid/api/urn_parser.rb +35 -0
- data/lib/pubid/api.rb +51 -5
- data/lib/pubid/ashrae/builder.rb +3 -3
- data/lib/pubid/ashrae/identifier.rb +6 -39
- data/lib/pubid/ashrae/identifiers/addenda_package.rb +0 -10
- data/lib/pubid/ashrae/identifiers/addendum.rb +0 -19
- data/lib/pubid/ashrae/identifiers/base.rb +3 -0
- data/lib/pubid/ashrae/identifiers/combined_addenda.rb +0 -15
- data/lib/pubid/ashrae/identifiers/errata.rb +0 -10
- data/lib/pubid/ashrae/identifiers/interpretation.rb +0 -10
- data/lib/pubid/ashrae/renderer.rb +117 -0
- data/lib/pubid/ashrae/single_identifier.rb +0 -13
- data/lib/pubid/ashrae/urn_generator.rb +0 -8
- data/lib/pubid/ashrae/urn_parser.rb +27 -0
- data/lib/pubid/ashrae.rb +42 -1
- data/lib/pubid/asme/components/code.rb +10 -2
- data/lib/pubid/asme/identifier.rb +0 -46
- data/lib/pubid/asme/identifiers/base.rb +0 -60
- data/lib/pubid/asme/renderer.rb +66 -0
- data/lib/pubid/asme/urn_parser.rb +31 -0
- data/lib/pubid/asme.rb +42 -1
- data/lib/pubid/astm/components/code.rb +9 -0
- data/lib/pubid/{jis → astm}/components.rb +1 -1
- data/lib/pubid/astm/identifier.rb +0 -77
- data/lib/pubid/astm/identifiers/adjunct.rb +0 -8
- data/lib/pubid/astm/identifiers/data_series.rb +0 -14
- data/lib/pubid/astm/identifiers/iso_dual_published.rb +9 -34
- data/lib/pubid/astm/identifiers/manual.rb +0 -27
- data/lib/pubid/astm/identifiers/monograph.rb +0 -14
- data/lib/pubid/astm/identifiers/research_report.rb +0 -7
- data/lib/pubid/astm/identifiers/standard.rb +0 -39
- data/lib/pubid/astm/identifiers/technical_report.rb +0 -13
- data/lib/pubid/astm/identifiers/work_in_progress.rb +0 -11
- data/lib/pubid/astm/identifiers.rb +18 -0
- data/lib/pubid/astm/renderer.rb +172 -0
- data/lib/pubid/astm/single_identifier.rb +0 -10
- data/lib/pubid/astm/urn_parser.rb +30 -0
- data/lib/pubid/astm.rb +39 -27
- data/lib/pubid/bsi/builder.rb +21 -12
- data/lib/pubid/bsi/identifier.rb +8 -62
- data/lib/pubid/bsi/identifiers/addendum_document.rb +3 -33
- data/lib/pubid/bsi/identifiers/adopted_european_norm.rb +11 -47
- data/lib/pubid/bsi/identifiers/adopted_international_standard.rb +11 -38
- data/lib/pubid/bsi/identifiers/aerospace_standard.rb +3 -53
- data/lib/pubid/bsi/identifiers/amendment.rb +3 -19
- data/lib/pubid/bsi/identifiers/british_industrial_practice.rb +2 -4
- data/lib/pubid/bsi/identifiers/british_standard.rb +2 -1
- data/lib/pubid/bsi/identifiers/bundled_identifier.rb +3 -84
- data/lib/pubid/bsi/identifiers/committee_document.rb +1 -14
- data/lib/pubid/bsi/identifiers/consolidated_identifier.rb +3 -84
- data/lib/pubid/bsi/identifiers/corrigendum.rb +3 -7
- data/lib/pubid/bsi/identifiers/detailed_specification.rb +1 -34
- data/lib/pubid/bsi/identifiers/disc.rb +1 -27
- data/lib/pubid/bsi/identifiers/draft_document.rb +3 -44
- data/lib/pubid/bsi/identifiers/electronic_book.rb +3 -36
- data/lib/pubid/bsi/identifiers/expert_commentary.rb +3 -15
- data/lib/pubid/bsi/identifiers/explanatory_supplement.rb +1 -45
- data/lib/pubid/bsi/identifiers/flex.rb +1 -33
- data/lib/pubid/bsi/identifiers/handbook.rb +2 -13
- data/lib/pubid/bsi/identifiers/index.rb +1 -30
- data/lib/pubid/bsi/identifiers/method.rb +1 -39
- data/lib/pubid/bsi/identifiers/national_annex.rb +5 -27
- data/lib/pubid/bsi/identifiers/practice_guide.rb +2 -4
- data/lib/pubid/bsi/identifiers/publicly_available_specification.rb +3 -52
- data/lib/pubid/bsi/identifiers/published_document.rb +3 -52
- data/lib/pubid/bsi/identifiers/section.rb +1 -28
- data/lib/pubid/bsi/identifiers/set.rb +3 -17
- data/lib/pubid/bsi/identifiers/standalone_amendment.rb +1 -7
- data/lib/pubid/bsi/identifiers/supplement_document.rb +3 -21
- data/lib/pubid/bsi/identifiers/supplementary_index.rb +1 -44
- data/lib/pubid/bsi/identifiers/technical_specification.rb +3 -45
- data/lib/pubid/bsi/identifiers/test_method.rb +1 -30
- data/lib/pubid/bsi/identifiers/value_added_publication.rb +3 -14
- data/lib/pubid/bsi/identifiers.rb +0 -1
- data/lib/pubid/bsi/renderer.rb +1050 -0
- data/lib/pubid/bsi/single_identifier.rb +6 -70
- data/lib/pubid/bsi/urn_generator.rb +2 -3
- data/lib/pubid/bsi/urn_parser.rb +52 -0
- data/lib/pubid/bsi.rb +224 -1
- data/lib/pubid/builder/base.rb +57 -10
- data/lib/pubid/bundled_identifier.rb +0 -1
- data/lib/pubid/ccsds/builder.rb +4 -3
- data/lib/pubid/ccsds/identifier.rb +63 -66
- data/lib/pubid/ccsds/identifiers/base.rb +11 -61
- data/lib/pubid/ccsds/identifiers/corrigendum.rb +7 -6
- data/lib/pubid/ccsds/parser.rb +4 -2
- data/lib/pubid/ccsds/supplement_identifier.rb +15 -11
- data/lib/pubid/ccsds/urn_generator.rb +3 -3
- data/lib/pubid/ccsds/urn_parser.rb +20 -0
- data/lib/pubid/ccsds.rb +39 -1
- data/lib/pubid/cen_cenelec/builder.rb +12 -14
- data/lib/pubid/cen_cenelec/identifier.rb +7 -38
- data/lib/pubid/cen_cenelec/identifiers/adopted_european_norm.rb +13 -4
- data/lib/pubid/cen_cenelec/identifiers/amendment.rb +2 -8
- data/lib/pubid/cen_cenelec/identifiers/base.rb +5 -41
- data/lib/pubid/cen_cenelec/identifiers/cen_report.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/cen_workshop_agreement.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/consolidated_identifier.rb +2 -25
- data/lib/pubid/cen_cenelec/identifiers/corrigendum.rb +2 -13
- data/lib/pubid/cen_cenelec/identifiers/european_norm.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/european_prestandard.rb +4 -7
- data/lib/pubid/cen_cenelec/identifiers/european_specification.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/fragment.rb +2 -2
- data/lib/pubid/cen_cenelec/identifiers/harmonization_document.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/technical_report.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/technical_specification.rb +2 -1
- data/lib/pubid/cen_cenelec/renderer.rb +261 -0
- data/lib/pubid/cen_cenelec/single_identifier.rb +11 -89
- data/lib/pubid/cen_cenelec/urn_generator.rb +6 -6
- data/lib/pubid/cen_cenelec/urn_parser.rb +28 -0
- data/lib/pubid/cen_cenelec.rb +168 -1
- data/lib/pubid/cie/components/code.rb +8 -0
- data/lib/pubid/cie/identifier.rb +6 -57
- data/lib/pubid/cie/urn_parser.rb +28 -0
- data/lib/pubid/cie.rb +43 -1
- data/lib/pubid/components/adoption.rb +104 -0
- data/lib/pubid/components/code.rb +22 -8
- data/lib/pubid/components/date.rb +23 -16
- data/lib/pubid/components/edition.rb +9 -6
- data/lib/pubid/components/iteration.rb +32 -0
- data/lib/pubid/components/language.rb +6 -4
- data/lib/pubid/components/locality.rb +10 -1
- data/lib/pubid/components/publisher.rb +9 -6
- data/lib/pubid/components/relationship.rb +151 -0
- data/lib/pubid/components/stage.rb +5 -14
- data/lib/pubid/components/supplement.rb +184 -0
- data/lib/pubid/components/type.rb +5 -15
- data/lib/pubid/components/typed_stage.rb +10 -11
- data/lib/pubid/components.rb +4 -1
- data/lib/pubid/core/update_codes.rb +28 -7
- data/lib/pubid/csa/identifier.rb +0 -59
- data/lib/pubid/csa/identifiers/base.rb +2 -122
- data/lib/pubid/csa/identifiers/cec.rb +2 -101
- data/lib/pubid/csa/identifiers/series.rb +2 -102
- data/lib/pubid/csa/renderer.rb +292 -0
- data/lib/pubid/csa/urn_generator.rb +1 -1
- data/lib/pubid/csa/urn_parser.rb +33 -0
- data/lib/pubid/csa.rb +42 -1
- data/lib/pubid/etsi/components/code.rb +9 -2
- data/lib/pubid/etsi/identifier.rb +0 -43
- data/lib/pubid/etsi/identifiers/base.rb +1 -4
- data/lib/pubid/etsi/identifiers/supplement_identifier.rb +2 -9
- data/lib/pubid/etsi/renderer.rb +42 -0
- data/lib/pubid/etsi/urn_parser.rb +34 -0
- data/lib/pubid/etsi.rb +42 -1
- data/lib/pubid/export/exporter.rb +4 -46
- data/lib/pubid/export/flavor_exporter.rb +111 -278
- data/lib/pubid/export.rb +0 -6
- data/lib/pubid/identifier.rb +2 -17
- data/lib/pubid/identifier_facade.rb +114 -0
- data/lib/pubid/identifier_metadata.rb +1 -1
- data/lib/pubid/idf/builder.rb +3 -3
- data/lib/pubid/idf/identifier.rb +3 -66
- data/lib/pubid/idf/identifiers/amendment.rb +2 -1
- data/lib/pubid/idf/identifiers/corrigendum.rb +2 -1
- data/lib/pubid/idf/identifiers/international_standard.rb +2 -1
- data/lib/pubid/idf/identifiers/reviewed_method.rb +2 -1
- data/lib/pubid/idf/parser.rb +3 -2
- data/lib/pubid/idf/renderer.rb +84 -0
- data/lib/pubid/idf/supplement_identifier.rb +2 -10
- data/lib/pubid/idf/urn_generator.rb +4 -39
- data/lib/pubid/idf/urn_parser.rb +25 -0
- data/lib/pubid/idf.rb +51 -1
- data/lib/pubid/iec/builder.rb +46 -64
- data/lib/pubid/iec/components/code.rb +8 -32
- data/lib/pubid/iec/components/publisher.rb +0 -1
- data/lib/pubid/iec/components.rb +14 -0
- data/lib/pubid/iec/identifier.rb +251 -213
- data/lib/pubid/iec/identifiers/amendment.rb +2 -3
- data/lib/pubid/iec/identifiers/base.rb +8 -32
- data/lib/pubid/iec/identifiers/component_specification.rb +3 -3
- data/lib/pubid/iec/identifiers/conformity_assessment.rb +1 -2
- data/lib/pubid/iec/identifiers/consolidated_identifier.rb +27 -26
- data/lib/pubid/iec/identifiers/corrigendum.rb +2 -3
- data/lib/pubid/iec/identifiers/fragment_identifier.rb +37 -22
- data/lib/pubid/iec/identifiers/guide.rb +0 -2
- data/lib/pubid/iec/identifiers/international_standard.rb +2 -3
- data/lib/pubid/iec/identifiers/interpretation_sheet.rb +2 -3
- data/lib/pubid/iec/identifiers/operational_document.rb +3 -3
- data/lib/pubid/iec/identifiers/publicly_available_specification.rb +2 -3
- data/lib/pubid/iec/identifiers/sheet_identifier.rb +21 -11
- data/lib/pubid/iec/identifiers/societal_technology_trend_report.rb +3 -3
- data/lib/pubid/iec/identifiers/systems_reference_document.rb +2 -3
- data/lib/pubid/iec/identifiers/technical_report.rb +2 -3
- data/lib/pubid/iec/identifiers/technical_specification.rb +2 -3
- data/lib/pubid/iec/identifiers/technology_report.rb +1 -2
- data/lib/pubid/iec/identifiers/test_report_form.rb +5 -34
- data/lib/pubid/iec/identifiers/vap_identifier.rb +26 -19
- data/lib/pubid/iec/identifiers/white_paper.rb +3 -3
- data/lib/pubid/iec/identifiers/working_document.rb +4 -48
- data/lib/pubid/iec/identifiers.rb +30 -0
- data/lib/pubid/iec/parser.rb +13 -12
- data/lib/pubid/iec/renderer.rb +254 -0
- data/lib/pubid/iec/single_identifier.rb +6 -12
- data/lib/pubid/iec/supplement_identifier.rb +58 -54
- data/lib/pubid/iec/urn_generator.rb +3 -3
- data/lib/pubid/iec/urn_parser.rb +3 -3
- data/lib/pubid/iec.rb +40 -68
- data/lib/pubid/ieee/builder.rb +12 -12
- data/lib/pubid/ieee/components/code.rb +8 -0
- data/lib/pubid/ieee/components/draft.rb +14 -0
- data/lib/pubid/ieee/components/relationship.rb +5 -149
- data/lib/pubid/ieee/identifier.rb +6 -41
- data/lib/pubid/ieee/identifiers/adopted_standard.rb +1 -6
- data/lib/pubid/ieee/identifiers/base.rb +101 -458
- data/lib/pubid/ieee/identifiers/conformance_identifier.rb +1 -7
- data/lib/pubid/ieee/identifiers/corrigendum.rb +1 -9
- data/lib/pubid/ieee/identifiers/csa_dual_published.rb +1 -7
- data/lib/pubid/ieee/identifiers/dual_identifier.rb +1 -1
- data/lib/pubid/ieee/identifiers/dual_published.rb +1 -1
- data/lib/pubid/ieee/identifiers/iec_ieee_copublished.rb +1 -6
- data/lib/pubid/ieee/identifiers/interpretation_identifier.rb +1 -7
- data/lib/pubid/ieee/identifiers/joint_development.rb +2 -0
- data/lib/pubid/ieee/identifiers/multi_numbered_identifier.rb +1 -15
- data/lib/pubid/ieee/identifiers/parenthetical_identifier.rb +1 -3
- data/lib/pubid/ieee/identifiers/project_draft_identifier.rb +15 -0
- data/lib/pubid/ieee/identifiers/redlined_standard.rb +1 -4
- data/lib/pubid/ieee/identifiers/si_standard.rb +1 -35
- data/lib/pubid/ieee/identifiers/standard.rb +1 -1
- data/lib/pubid/ieee/pre_parser.rb +301 -0
- data/lib/pubid/ieee/renderer.rb +307 -0
- data/lib/pubid/ieee/urn_parser.rb +34 -0
- data/lib/pubid/ieee.rb +62 -1
- data/lib/pubid/ieee_debug.rb +0 -1
- data/lib/pubid/iho/builder.rb +2 -2
- data/lib/pubid/iho/identifier.rb +8 -42
- data/lib/pubid/iho/identifiers/base.rb +49 -10
- data/lib/pubid/iho/parser.rb +3 -3
- data/lib/pubid/iho/renderer.rb +30 -0
- data/lib/pubid/iho/urn_generator.rb +2 -2
- data/lib/pubid/iho/urn_parser.rb +58 -0
- data/lib/pubid/iho.rb +50 -1
- data/lib/pubid/iso/builder.rb +55 -53
- data/lib/pubid/iso/bundled_identifier.rb +51 -0
- data/lib/pubid/iso/components/code.rb +7 -19
- data/lib/pubid/iso/components/publisher.rb +10 -8
- data/lib/pubid/iso/components.rb +2 -4
- data/lib/pubid/iso/identifier.rb +218 -252
- data/lib/pubid/iso/identifiers/addendum.rb +9 -6
- data/lib/pubid/iso/identifiers/amendment.rb +8 -4
- data/lib/pubid/iso/identifiers/corrigendum.rb +4 -4
- data/lib/pubid/iso/identifiers/data.rb +0 -1
- data/lib/pubid/iso/identifiers/directives.rb +8 -2
- data/lib/pubid/iso/identifiers/directives_supplement.rb +43 -14
- data/lib/pubid/iso/identifiers/extract.rb +2 -2
- data/lib/pubid/iso/identifiers/guide.rb +0 -1
- data/lib/pubid/iso/identifiers/international_standard.rb +4 -4
- data/lib/pubid/iso/identifiers/international_standardized_profile.rb +4 -4
- data/lib/pubid/iso/identifiers/international_workshop_agreement.rb +10 -4
- data/lib/pubid/iso/identifiers/pas.rb +2 -2
- data/lib/pubid/iso/identifiers/recommendation.rb +2 -2
- data/lib/pubid/iso/identifiers/supplement.rb +11 -3
- data/lib/pubid/iso/identifiers/tc_document.rb +44 -15
- data/lib/pubid/iso/identifiers/technical_report.rb +4 -4
- data/lib/pubid/iso/identifiers/technical_specification.rb +2 -2
- data/lib/pubid/iso/identifiers/technology_trends_assessments.rb +2 -2
- data/lib/pubid/iso/identifiers.rb +0 -1
- data/lib/pubid/iso/normalizer.rb +89 -0
- data/lib/pubid/iso/parser.rb +22 -4
- data/lib/pubid/iso/supplement_identifier.rb +15 -2
- data/lib/pubid/iso/urn_generator.rb +66 -182
- data/lib/pubid/iso/urn_parser.rb +12 -7
- data/lib/pubid/iso.rb +173 -2
- data/lib/pubid/itu/builder.rb +0 -12
- data/lib/pubid/itu/components/code.rb +8 -0
- data/lib/pubid/itu/components.rb +11 -0
- data/lib/pubid/itu/identifier.rb +6 -104
- data/lib/pubid/itu/identifiers/amendment.rb +0 -2
- data/lib/pubid/itu/identifiers/annex.rb +0 -2
- data/lib/pubid/itu/identifiers/base.rb +0 -6
- data/lib/pubid/itu/identifiers/combined_identifier.rb +0 -2
- data/lib/pubid/itu/identifiers/corrigendum.rb +0 -2
- data/lib/pubid/itu/identifiers/recommendation.rb +0 -2
- data/lib/pubid/itu/identifiers/special_publication.rb +0 -2
- data/lib/pubid/itu/identifiers/supplement.rb +0 -2
- data/lib/pubid/itu/urn_parser.rb +23 -0
- data/lib/pubid/itu.rb +42 -1
- data/lib/pubid/jcgm/builder.rb +16 -8
- data/lib/pubid/jcgm/identifier.rb +0 -43
- data/lib/pubid/jcgm/identifiers/amendment.rb +2 -7
- data/lib/pubid/jcgm/identifiers/gum_guide.rb +2 -10
- data/lib/pubid/jcgm/renderer.rb +68 -0
- data/lib/pubid/jcgm/single_identifier.rb +1 -5
- data/lib/pubid/jcgm/urn_generator.rb +4 -6
- data/lib/pubid/jcgm/urn_parser.rb +23 -0
- data/lib/pubid/jcgm.rb +43 -2
- data/lib/pubid/jis/builder.rb +44 -52
- data/lib/pubid/jis/identifier.rb +132 -46
- data/lib/pubid/jis/identifiers/amendment.rb +1 -1
- data/lib/pubid/jis/identifiers/corrigendum.rb +16 -0
- data/lib/pubid/jis/identifiers/standard.rb +2 -1
- data/lib/pubid/jis/identifiers/technical_report.rb +2 -1
- data/lib/pubid/jis/identifiers/technical_specification.rb +2 -1
- data/lib/pubid/jis/identifiers.rb +1 -1
- data/lib/pubid/jis/parser.rb +31 -5
- data/lib/pubid/jis/renderer.rb +69 -0
- data/lib/pubid/jis/single_identifier.rb +6 -12
- data/lib/pubid/jis/supplement_identifier.rb +17 -14
- data/lib/pubid/jis/urn_parser.rb +23 -0
- data/lib/pubid/jis.rb +42 -2
- data/lib/pubid/nist/builder.rb +63 -1871
- data/lib/pubid/nist/caster.rb +1272 -0
- data/lib/pubid/nist/circular_supplement_builder.rb +291 -0
- data/lib/pubid/nist/components/code.rb +9 -20
- data/lib/pubid/nist/components/supplement.rb +2 -2
- data/lib/pubid/nist/components.rb +0 -1
- data/lib/pubid/nist/identifier.rb +11 -48
- data/lib/pubid/nist/identifiers/base.rb +110 -47
- data/lib/pubid/nist/identifiers/circular.rb +7 -2
- data/lib/pubid/nist/identifiers/circular_supplement.rb +2 -1
- data/lib/pubid/nist/identifiers/commercial_standard.rb +2 -1
- data/lib/pubid/nist/identifiers/commercial_standard_emergency.rb +6 -4
- data/lib/pubid/nist/identifiers/commercial_standards_monthly.rb +10 -3
- data/lib/pubid/nist/identifiers/crpl_report.rb +8 -8
- data/lib/pubid/nist/identifiers/dated_document.rb +49 -0
- data/lib/pubid/nist/identifiers/federal_information_processing_standards.rb +15 -24
- data/lib/pubid/nist/identifiers/grant_contractor_report.rb +2 -1
- data/lib/pubid/nist/identifiers/handbook.rb +2 -1
- data/lib/pubid/nist/identifiers/internal_report.rb +2 -1
- data/lib/pubid/nist/identifiers/letter_circular.rb +2 -1
- data/lib/pubid/nist/identifiers/miscellaneous_publication.rb +5 -4
- data/lib/pubid/nist/identifiers/monograph.rb +7 -3
- data/lib/pubid/nist/identifiers/report.rb +4 -2
- data/lib/pubid/nist/identifiers/special_publication.rb +2 -1
- data/lib/pubid/nist/identifiers/technical_note.rb +3 -2
- data/lib/pubid/nist/identifiers.rb +1 -0
- data/lib/pubid/nist/parser.rb +62 -452
- data/lib/pubid/nist/parser_output_normalizer.rb +233 -0
- data/lib/pubid/nist/preprocessor.rb +416 -0
- data/lib/pubid/nist/renderer.rb +43 -0
- data/lib/pubid/nist/router.rb +148 -0
- data/lib/pubid/nist/series/base.rb +58 -0
- data/lib/pubid/nist/series/crpl.rb +13 -0
- data/lib/pubid/nist/series/fips.rb +14 -0
- data/lib/pubid/nist/series/ir.rb +60 -0
- data/lib/pubid/nist/series/letter_preserving.rb +15 -0
- data/lib/pubid/nist/series/mono.rb +19 -0
- data/lib/pubid/nist/series/ncstar.rb +20 -0
- data/lib/pubid/nist/series.rb +49 -0
- data/lib/pubid/nist/supplement_identifier.rb +3 -1
- data/lib/pubid/nist/urn_parser.rb +67 -0
- data/lib/pubid/nist.rb +82 -4
- data/lib/pubid/oiml/components/code.rb +10 -0
- data/lib/pubid/oiml/identifier.rb +0 -50
- data/lib/pubid/oiml/identifiers/annex.rb +3 -45
- data/lib/pubid/oiml/identifiers/base.rb +2 -17
- data/lib/pubid/oiml/renderer.rb +161 -0
- data/lib/pubid/oiml/single_identifier.rb +6 -45
- data/lib/pubid/oiml/supplement_identifier.rb +4 -19
- data/lib/pubid/oiml/urn_generator.rb +0 -8
- data/lib/pubid/oiml/urn_parser.rb +22 -0
- data/lib/pubid/oiml.rb +42 -1
- data/lib/pubid/plateau/identifier.rb +7 -41
- data/lib/pubid/plateau/identifiers/handbook.rb +1 -3
- data/lib/pubid/plateau/identifiers/technical_report.rb +1 -1
- data/lib/pubid/plateau/renderer.rb +51 -0
- data/lib/pubid/plateau/supplement_identifier.rb +1 -1
- data/lib/pubid/plateau/urn_parser.rb +43 -0
- data/lib/pubid/plateau.rb +43 -1
- data/lib/pubid/renderers/directives_renderer.rb +22 -8
- data/lib/pubid/renderers/guide_renderer.rb +4 -2
- data/lib/pubid/renderers/human_readable.rb +18 -7
- data/lib/pubid/rendering/context.rb +28 -19
- data/lib/pubid/rendering.rb +0 -3
- data/lib/pubid/sae/components/date.rb +8 -0
- data/lib/pubid/sae/components/type.rb +5 -1
- data/lib/pubid/sae/identifier.rb +0 -23
- data/lib/pubid/sae/identifiers/base.rb +2 -16
- data/lib/pubid/sae/renderer.rb +36 -0
- data/lib/pubid/sae/urn_generator.rb +2 -10
- data/lib/pubid/sae/urn_parser.rb +36 -0
- data/lib/pubid/sae.rb +42 -1
- data/lib/pubid/urn_generator/base.rb +12 -12
- data/lib/pubid/urn_parser/base.rb +81 -0
- data/lib/pubid/urn_parser/errors.rb +9 -0
- data/lib/pubid/urn_parser.rb +14 -0
- data/lib/pubid/version.rb +1 -1
- data/lib/pubid.rb +29 -7
- data/lib/tasks/website-data.json +1940 -1882
- metadata +75 -44
- data/lib/pubid/amca/scheme.rb +0 -16
- data/lib/pubid/ansi/scheme.rb +0 -15
- data/lib/pubid/api/scheme.rb +0 -66
- data/lib/pubid/ashrae/scheme.rb +0 -53
- data/lib/pubid/asme/scheme.rb +0 -37
- data/lib/pubid/astm/scheme.rb +0 -55
- data/lib/pubid/bsi/identifiers/base.rb +0 -11
- data/lib/pubid/bsi/scheme.rb +0 -243
- data/lib/pubid/ccsds/scheme.rb +0 -57
- data/lib/pubid/cen_cenelec/scheme.rb +0 -164
- data/lib/pubid/cie/scheme.rb +0 -64
- data/lib/pubid/components/factory.rb +0 -50
- data/lib/pubid/csa/scheme.rb +0 -44
- data/lib/pubid/etsi/scheme.rb +0 -42
- data/lib/pubid/export/data_class_exporter.rb +0 -59
- data/lib/pubid/export/ieee_exporter.rb +0 -78
- data/lib/pubid/export/itu_exporter.rb +0 -66
- data/lib/pubid/export/nist_exporter.rb +0 -64
- data/lib/pubid/export/registry_exporter.rb +0 -90
- data/lib/pubid/export/scheme_exporter.rb +0 -70
- data/lib/pubid/identifier_registry.rb +0 -198
- data/lib/pubid/idf/scheme.rb +0 -61
- data/lib/pubid/iec/scheme.rb +0 -71
- data/lib/pubid/ieee/scheme.rb +0 -90
- data/lib/pubid/iho/scheme.rb +0 -29
- data/lib/pubid/iso/identifiers/base.rb +0 -115
- data/lib/pubid/iso/scheme.rb +0 -193
- data/lib/pubid/itu/scheme.rb +0 -174
- data/lib/pubid/jcgm/scheme.rb +0 -60
- data/lib/pubid/jis/components/code.rb +0 -59
- data/lib/pubid/jis/identifiers/base.rb +0 -72
- data/lib/pubid/jis/scheme.rb +0 -49
- data/lib/pubid/nist/components/publisher.rb +0 -24
- data/lib/pubid/nist/scheme.rb +0 -199
- data/lib/pubid/oiml/scheme.rb +0 -46
- data/lib/pubid/plateau/scheme.rb +0 -45
- data/lib/pubid/rendering/base.rb +0 -73
- data/lib/pubid/rendering/common.rb +0 -211
- data/lib/pubid/rendering/format.rb +0 -25
- data/lib/pubid/sae/scheme.rb +0 -47
- data/lib/pubid/scheme.rb +0 -219
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pubid
|
|
4
|
+
module Nist
|
|
5
|
+
# Normalizes the raw hash produced by the NIST parser before the Builder
|
|
6
|
+
# constructs the identifier object.
|
|
7
|
+
#
|
|
8
|
+
# The parser emits a flat hash with keys like :first_number, :second_number,
|
|
9
|
+
# :edition_dash_year, :update_prefix, etc. Many of those keys are
|
|
10
|
+
# *incompatible shapes* — e.g. parser captures a year as :edition_dash_year
|
|
11
|
+
# when it is actually a second_number, or a letter+digits suffix lives
|
|
12
|
+
# inside :first_number when it should become a Part component.
|
|
13
|
+
#
|
|
14
|
+
# Each `normalize_*` method here performs one such shape correction,
|
|
15
|
+
# mutating the hash in place. The Normalizer is intentionally
|
|
16
|
+
# *side-effect-only*: it never reads from the Builder, the Caster, or the
|
|
17
|
+
# identifier classes, so it can be tested in isolation.
|
|
18
|
+
#
|
|
19
|
+
# Pre-processing blocks that need to surface extracted components to the
|
|
20
|
+
# Builder (e.g. letter-suffix Part components, embedded-edition objects)
|
|
21
|
+
# remain in Builder#build because they create local variables that flow
|
|
22
|
+
# into the construction phase. All other normalizations live here.
|
|
23
|
+
class ParserOutputNormalizer
|
|
24
|
+
# Range of years we treat as "looks like a calendar year" when
|
|
25
|
+
# disambiguating :edition_dash_year from :second_number.
|
|
26
|
+
VALID_YEAR_RANGE = (1901..2026).freeze
|
|
27
|
+
|
|
28
|
+
# Series that treat :edition_dash_year as a year-only edition when the
|
|
29
|
+
# dash year falls in VALID_YEAR_RANGE. For other series with a dash-year
|
|
30
|
+
# in this range, the dash-year is interpreted differently (or kept as
|
|
31
|
+
# a compound number, depending on the branch).
|
|
32
|
+
DASH_YEAR_AS_EDITION_SERIES = %w[HB CS FIPS].freeze
|
|
33
|
+
|
|
34
|
+
# Apply all normalizations to the parsed hash in the correct order.
|
|
35
|
+
# @param parsed_hash [Hash] parser output (mutated in place)
|
|
36
|
+
# @return [Hash] the same hash, normalized
|
|
37
|
+
def normalize(parsed_hash)
|
|
38
|
+
merge_edition_e_into_update(parsed_hash)
|
|
39
|
+
extract_embedded_edition_with_year(parsed_hash)
|
|
40
|
+
extract_embedded_edition_without_dash_year(parsed_hash)
|
|
41
|
+
split_second_number_edition_year(parsed_hash)
|
|
42
|
+
split_fips_month_year_after_part(parsed_hash)
|
|
43
|
+
disambiguate_ir_compound_vs_edition(parsed_hash)
|
|
44
|
+
disambiguate_dash_year(parsed_hash)
|
|
45
|
+
parsed_hash
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# Pattern: "800-53r4/Upd3-2015"
|
|
51
|
+
# Parser captures "-2015" as :edition_e but it belongs on :update.
|
|
52
|
+
def merge_edition_e_into_update(parsed_hash)
|
|
53
|
+
return unless parsed_hash[:update_prefix] && parsed_hash[:update] && parsed_hash[:edition_e]
|
|
54
|
+
|
|
55
|
+
edition_id = parsed_hash[:edition_e][:edition_id]
|
|
56
|
+
parsed_hash[:update] = parsed_hash[:update].merge(update_year: edition_id)
|
|
57
|
+
parsed_hash.delete(:edition_e)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Pattern: "44e2-1955"
|
|
61
|
+
# first_number="44e2", edition_dash_year="1955"
|
|
62
|
+
# Result: first_number="44", edition(type:"e", id:"2", additional_text:"1955")
|
|
63
|
+
def extract_embedded_edition_with_year(parsed_hash)
|
|
64
|
+
return unless parsed_hash[:first_number]&.to_s&.match?(/^[0-9]+[a-zA-Z]\d+$/) &&
|
|
65
|
+
parsed_hash[:edition_dash_year]
|
|
66
|
+
|
|
67
|
+
number_str = parsed_hash[:first_number].to_s
|
|
68
|
+
return unless (match_data = number_str.match(/^(\d+)([a-zA-Z])(\d+)$/))
|
|
69
|
+
|
|
70
|
+
base_number, edition_type, edition_id = match_data[1], match_data[2].downcase, match_data[3]
|
|
71
|
+
|
|
72
|
+
parsed_hash[:first_number] = Components::Code.new(value: base_number)
|
|
73
|
+
parsed_hash[:edition_with_year] = Components::Edition.new(
|
|
74
|
+
type: edition_type,
|
|
75
|
+
id: edition_id,
|
|
76
|
+
additional_text: parsed_hash[:edition_dash_year][:dash_year],
|
|
77
|
+
)
|
|
78
|
+
parsed_hash.delete(:edition_dash_year)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Pattern: "8115r1" (with no edition_dash_year)
|
|
82
|
+
# first_number="8115r1", no second_number
|
|
83
|
+
# Result: first_number="8115", edition(type:"r", id:"1")
|
|
84
|
+
#
|
|
85
|
+
# CRITICAL: Only when no :second_number is present, otherwise the
|
|
86
|
+
# compound-number logic in the Builder handles the pattern.
|
|
87
|
+
def extract_embedded_edition_without_dash_year(parsed_hash)
|
|
88
|
+
return if parsed_hash[:second_number]
|
|
89
|
+
return unless parsed_hash[:first_number]&.to_s&.match?(/^[0-9]+[a-zA-Z]\d+$/)
|
|
90
|
+
return if parsed_hash[:edition_dash_year]
|
|
91
|
+
|
|
92
|
+
number_str = parsed_hash[:first_number].to_s
|
|
93
|
+
return unless (match_data = number_str.match(/^(\d+)([a-zA-Z])(\d+)$/))
|
|
94
|
+
|
|
95
|
+
base_number, edition_type, edition_id = match_data[1], match_data[2].downcase, match_data[3]
|
|
96
|
+
|
|
97
|
+
parsed_hash[:first_number] = Components::Code.new(value: base_number)
|
|
98
|
+
parsed_hash[:edition_with_year] = Components::Edition.new(
|
|
99
|
+
type: edition_type,
|
|
100
|
+
id: edition_id,
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Pattern: "105-1-1990"
|
|
105
|
+
# Parser returns second_number_edition_year={second_number:"1", dash_year:"1990"}
|
|
106
|
+
# Result: :second_number="1", plus either :edition_from_year (HB series)
|
|
107
|
+
# or :edition_dash_year (other series) for further downstream processing.
|
|
108
|
+
def split_second_number_edition_year(parsed_hash)
|
|
109
|
+
return unless parsed_hash[:second_number_edition_year]
|
|
110
|
+
|
|
111
|
+
combined = parsed_hash[:second_number_edition_year]
|
|
112
|
+
parsed_hash[:second_number] = combined[:second_number]
|
|
113
|
+
dash_year = combined[:dash_year]
|
|
114
|
+
|
|
115
|
+
is_handbook = safely_to_s(parsed_hash[:series]) == "HB"
|
|
116
|
+
if is_handbook && dash_year.to_s.match?(/^\d{4}$/)
|
|
117
|
+
parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: dash_year)
|
|
118
|
+
else
|
|
119
|
+
parsed_hash[:edition_dash_year] = { dash_year: dash_year }
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
parsed_hash.delete(:second_number_edition_year)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Pattern: "11-1-Sep1977"
|
|
126
|
+
# Parser returns fips_month_year_after_part={second_number:"1", edition_month:"Sep", edition_year:"1977"}
|
|
127
|
+
# Result: :second_number="1", :edition_from_year(type:"e", id:"197709")
|
|
128
|
+
def split_fips_month_year_after_part(parsed_hash)
|
|
129
|
+
return unless parsed_hash[:fips_month_year_after_part]
|
|
130
|
+
|
|
131
|
+
combined = parsed_hash[:fips_month_year_after_part]
|
|
132
|
+
parsed_hash[:second_number] = combined[:second_number]
|
|
133
|
+
month_str = combined[:edition_month]
|
|
134
|
+
year_str = combined[:edition_year]
|
|
135
|
+
|
|
136
|
+
month_num = month_to_number(month_str)
|
|
137
|
+
edition_id = month_num&.positive? ? "#{year_str}#{format('%02d', month_num)}" : year_str
|
|
138
|
+
|
|
139
|
+
parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: edition_id)
|
|
140
|
+
parsed_hash.delete(:fips_month_year_after_part)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Pattern: "84-2946" with series=IR
|
|
144
|
+
# For IR (Interagency Report), a 2-digit first_number followed by a
|
|
145
|
+
# 4-digit dash-year is *almost* always a year-only edition (e.g.
|
|
146
|
+
# "76e1100") — except when the 4-digit number is clearly not a year
|
|
147
|
+
# (>= 2700) or there is an embedded :edition_e (compound number).
|
|
148
|
+
def disambiguate_ir_compound_vs_edition(parsed_hash)
|
|
149
|
+
return unless safely_to_s(parsed_hash[:series]) == "IR"
|
|
150
|
+
return unless parsed_hash[:first_number] && parsed_hash[:edition_dash_year]
|
|
151
|
+
|
|
152
|
+
first_num = parsed_hash[:first_number].to_s
|
|
153
|
+
dash_year = parsed_hash[:edition_dash_year][:dash_year].to_s
|
|
154
|
+
return unless first_num.match?(/^\d{2}$/) && dash_year.match?(/^\d{4}$/)
|
|
155
|
+
|
|
156
|
+
dash_year_num = dash_year.to_i
|
|
157
|
+
is_valid_year = VALID_YEAR_RANGE.cover?(dash_year_num)
|
|
158
|
+
has_embedded_edition = parsed_hash[:edition_e]
|
|
159
|
+
|
|
160
|
+
if is_valid_year && !has_embedded_edition
|
|
161
|
+
parsed_hash[:first_number] = Components::Code.new(value: first_num)
|
|
162
|
+
parsed_hash[:edition] = Components::Edition.new(type: "e", id: dash_year)
|
|
163
|
+
else
|
|
164
|
+
parsed_hash[:first_number] = Components::Code.new(value: "#{first_num}-#{dash_year}")
|
|
165
|
+
end
|
|
166
|
+
parsed_hash.delete(:edition_dash_year)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Pattern: "250-1039" or "15-1000" or "1946-1947" (RPT)
|
|
170
|
+
# When the parser captures a :first_number plus :edition_dash_year, the
|
|
171
|
+
# dash year is interpreted differently per series:
|
|
172
|
+
# - RPT: always join into a compound (date range), regardless of year
|
|
173
|
+
# - GCR: always convert dash-year to year-only edition
|
|
174
|
+
# - IR: convert dash-year to year-only edition only if it is a valid year
|
|
175
|
+
# - HB/CS/FIPS: convert dash-year to year-only edition only if valid year
|
|
176
|
+
# - others: drop :edition_dash_year, or stash as :second_number if < 1900
|
|
177
|
+
def disambiguate_dash_year(parsed_hash)
|
|
178
|
+
return unless parsed_hash[:first_number] && parsed_hash[:edition_dash_year]
|
|
179
|
+
return if parsed_hash[:first_number].to_s.match?(/^[0-9]+[a-zA-Z]\d+$/)
|
|
180
|
+
|
|
181
|
+
dash_year = parsed_hash[:edition_dash_year][:dash_year].to_s
|
|
182
|
+
series = safely_to_s(parsed_hash[:series])
|
|
183
|
+
dash_year_num = dash_year.to_i
|
|
184
|
+
is_valid_year = VALID_YEAR_RANGE.cover?(dash_year_num)
|
|
185
|
+
|
|
186
|
+
if series == "RPT"
|
|
187
|
+
# RPT date ranges: "1946-1947" -> "1946-1947"
|
|
188
|
+
parsed_hash[:first_number] =
|
|
189
|
+
Components::Code.new(value: "#{parsed_hash[:first_number]}-#{dash_year}")
|
|
190
|
+
parsed_hash.delete(:edition_dash_year)
|
|
191
|
+
elsif series == "GCR"
|
|
192
|
+
# GCR always converts dash-year to edition
|
|
193
|
+
stash_edition_from_year(parsed_hash, dash_year)
|
|
194
|
+
elsif series == "IR" && is_valid_year
|
|
195
|
+
# IR converts valid years to edition
|
|
196
|
+
stash_edition_from_year(parsed_hash, dash_year)
|
|
197
|
+
elsif DASH_YEAR_AS_EDITION_SERIES.include?(series) && is_valid_year
|
|
198
|
+
# HB/CS/FIPS: convert dash-year to edition only if valid year
|
|
199
|
+
stash_edition_from_year(parsed_hash, dash_year)
|
|
200
|
+
elsif dash_year_num < 1900
|
|
201
|
+
# For other series, dash-year < 1900 is a second_number
|
|
202
|
+
parsed_hash[:second_number] = dash_year
|
|
203
|
+
parsed_hash.delete(:edition_dash_year)
|
|
204
|
+
end
|
|
205
|
+
# Other cases (non-HB/CS/FIPS with a valid year): leave the keys alone
|
|
206
|
+
# so downstream Builder logic can handle them.
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Convert dash-year into a year-only Edition and stash for the Builder
|
|
210
|
+
# to assign after construction.
|
|
211
|
+
def stash_edition_from_year(parsed_hash, dash_year)
|
|
212
|
+
parsed_hash[:edition_from_year] = Components::Edition.new(type: "e", id: dash_year)
|
|
213
|
+
parsed_hash.delete(:edition_dash_year)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Safely coerce arbitrary values to strings (parsers can hand us Parslet
|
|
217
|
+
# nodes that raise on #to_s in some scenarios).
|
|
218
|
+
def safely_to_s(value)
|
|
219
|
+
value.to_s
|
|
220
|
+
rescue StandardError
|
|
221
|
+
""
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Convert a month abbreviation or name to a 1-based month number.
|
|
225
|
+
# Returns nil when the value is not a recognizable month.
|
|
226
|
+
def month_to_number(month_str)
|
|
227
|
+
Date::ABBR_MONTHNAMES.index(month_str) ||
|
|
228
|
+
Date::MONTHNAMES.index(month_str) ||
|
|
229
|
+
month_str.to_i
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
end
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pubid
|
|
4
|
+
module Nist
|
|
5
|
+
# Owns all regex-based normalization applied to NIST identifier strings
|
|
6
|
+
# before the Parslet grammar sees them.
|
|
7
|
+
#
|
|
8
|
+
# The Parser entry point delegates to Preprocessor#call; the grammar
|
|
9
|
+
# itself never inspects raw user input. Each private method below is a
|
|
10
|
+
# named stage of normalization, applied in the order declared in #call.
|
|
11
|
+
# Stages are kept in the historically validated sequence — reordering
|
|
12
|
+
# them risks regressions because later stages often match patterns
|
|
13
|
+
# produced by earlier ones.
|
|
14
|
+
#
|
|
15
|
+
# Format detection (:mr vs :short) is also owned here because it is a
|
|
16
|
+
# property of the original input, not of the parsed tree.
|
|
17
|
+
class Preprocessor
|
|
18
|
+
# Outcome of preprocessing.
|
|
19
|
+
# cleaned - the normalized identifier string ready for the grammar
|
|
20
|
+
# format - :mr if the input uses dot-separators, :short otherwise
|
|
21
|
+
Result = Struct.new(:cleaned, :format, keyword_init: true)
|
|
22
|
+
|
|
23
|
+
# Convert Roman numerals to Arabic numbers per NIST spec.
|
|
24
|
+
ROMAN_TO_ARABIC = {
|
|
25
|
+
"I" => "1",
|
|
26
|
+
"II" => "2",
|
|
27
|
+
"III" => "3",
|
|
28
|
+
"IV" => "4",
|
|
29
|
+
"V" => "5",
|
|
30
|
+
"VI" => "6",
|
|
31
|
+
"VII" => "7",
|
|
32
|
+
"VIII" => "8",
|
|
33
|
+
"IX" => "9",
|
|
34
|
+
"X" => "10",
|
|
35
|
+
}.freeze
|
|
36
|
+
|
|
37
|
+
def initialize(input)
|
|
38
|
+
@input = input.to_s.strip
|
|
39
|
+
@cleaned = Core::UpdateCodes.apply(@input, :nist)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Run every normalization stage and return a Result.
|
|
43
|
+
#
|
|
44
|
+
# Stage order is load-bearing — later stages match patterns produced
|
|
45
|
+
# by earlier ones. Reordering requires running the full NIST fixture
|
|
46
|
+
# suite to verify no regression.
|
|
47
|
+
def call
|
|
48
|
+
run_stages
|
|
49
|
+
Result.new(cleaned: @cleaned, format: detected_format)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Sequence of normalization stages in historically validated order.
|
|
53
|
+
# Extracted so rubocop can scope length/ABC metrics narrowly.
|
|
54
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
|
55
|
+
def run_stages
|
|
56
|
+
normalize_publisher_and_series!
|
|
57
|
+
normalize_lcirc_supplement_contexts!
|
|
58
|
+
normalize_revision_spacing!
|
|
59
|
+
normalize_letter_suffix_casing!
|
|
60
|
+
normalize_draft_and_volume!
|
|
61
|
+
convert_roman_volumes!
|
|
62
|
+
normalize_supplement_and_part!
|
|
63
|
+
normalize_version_notation!
|
|
64
|
+
normalize_edition_year_suffix!
|
|
65
|
+
normalize_revision_with_letter!
|
|
66
|
+
normalize_version_dotted_spaces!
|
|
67
|
+
normalize_update_markers!
|
|
68
|
+
normalize_supplement_variants!
|
|
69
|
+
normalize_revision_language!
|
|
70
|
+
normalize_mr_translation_codes!
|
|
71
|
+
convert_dashyear_to_edition!
|
|
72
|
+
revert_dashyear_for_series!
|
|
73
|
+
normalize_version_verbose!
|
|
74
|
+
normalize_part_notation!
|
|
75
|
+
normalize_series_specific_spacing!
|
|
76
|
+
normalize_verbose_keywords!
|
|
77
|
+
end
|
|
78
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
|
79
|
+
|
|
80
|
+
# Detect input format: :mr (dot-separated machine-readable) or :short.
|
|
81
|
+
def detected_format
|
|
82
|
+
@input.include?(".") && !@input.match?(/\s/) ? :mr : :short
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
# Lowercase publishers, publisher+series concatenations, lowercase
|
|
88
|
+
# series codes, and the lone "LC" → "LCIRC" expansion.
|
|
89
|
+
def normalize_publisher_and_series!
|
|
90
|
+
@cleaned = @cleaned.sub(/^nbs\b/i, "NBS")
|
|
91
|
+
@cleaned = @cleaned.sub(/^nist\b/i, "NIST")
|
|
92
|
+
@cleaned = @cleaned.gsub(
|
|
93
|
+
/^(NBS|NIST)(IR|FIPS|GCR|HB|MONO|MP|NCSTAR|NSRDS)/i, '\1 \2'
|
|
94
|
+
)
|
|
95
|
+
@cleaned = @cleaned.sub(/\b(ir|sp|tn|hb|fips|ams|vts)\b/i, &:upcase)
|
|
96
|
+
@cleaned = @cleaned.gsub(/\bLC\b(?!IRC)/, "LCIRC")
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# LCIRC series: combine with NBS prefix when a supplement marker
|
|
100
|
+
# follows, and convert MR-format dots to spaces so the grammar's
|
|
101
|
+
# circ_supplement_identifier rule can match.
|
|
102
|
+
def normalize_lcirc_supplement_contexts!
|
|
103
|
+
@cleaned = @cleaned.gsub(
|
|
104
|
+
/\bNBS LCIRC\b(?=.*\b(?:supp?|sup\+|r\d+\/)\d)/, "NBS.LCIRC"
|
|
105
|
+
)
|
|
106
|
+
@cleaned = @cleaned.gsub(
|
|
107
|
+
/\bNBS\.LCIRC\.(\d+r\d+\/\d{4})/, "NBS LCIRC \\1"
|
|
108
|
+
)
|
|
109
|
+
@cleaned = @cleaned.gsub(/\bNBS\.LCIRC\.(\d+r\d+)\b/, "NBS LCIRC \\1")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Separate revision markers from adjacent digits. LCIRC and CIRC
|
|
113
|
+
# series keep their compact revision form because their grammar
|
|
114
|
+
# rules expect it.
|
|
115
|
+
def normalize_revision_spacing!
|
|
116
|
+
@cleaned = @cleaned.gsub(/([-\d]+[IVX]+[-\d]+)\s+(\d+)/, '\1.\2')
|
|
117
|
+
@cleaned = @cleaned.gsub(/(?<!e)(\d)(rev\d{4})/, '\1 \2')
|
|
118
|
+
# Re-parse round-trip: fold dotted edition-date back to canonical
|
|
119
|
+
# "rev" form so pubid can re-read its own output.
|
|
120
|
+
@cleaned = @cleaned.gsub(/(\d+e\d+)\.([A-Za-z]{3,9}\d{4})/, '\1rev\2')
|
|
121
|
+
# IR revision with slash+year is a V1 Update, not a revision.
|
|
122
|
+
# Must run BEFORE the LCIRC slash rule below so it never adds
|
|
123
|
+
# a space here.
|
|
124
|
+
normalize_ir_slash_year_to_update!
|
|
125
|
+
unless @cleaned.include?("LCIRC") || @cleaned.include?("CIRC")
|
|
126
|
+
@cleaned = @cleaned.gsub(/(\d)(r\d+\/\d{4})/, '\1 \2')
|
|
127
|
+
end
|
|
128
|
+
@cleaned = @cleaned.gsub(/\b(r(?!v)\d{4})\b/, ' \1')
|
|
129
|
+
@cleaned = @cleaned.gsub(/(\d)(r[A-Z][a-z]{2,8}\d{4})/, '\1 \2')
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# "5058r04/98" → "5058/Upd1-199804" (mirrors archived v1 NistIr
|
|
133
|
+
# parser). A 2-digit year normalizes to 19YY. Only applies to IR
|
|
134
|
+
# series; CIRC/LCIRC keep their slash-year form.
|
|
135
|
+
def normalize_ir_slash_year_to_update!
|
|
136
|
+
return unless @cleaned =~ /\bIR\b/ && !@cleaned.include?("CIRC")
|
|
137
|
+
|
|
138
|
+
@cleaned = @cleaned.gsub(%r{(\d)r(\d{1,2})/(\d{2,4})}) do
|
|
139
|
+
num, mon, yr = ::Regexp.last_match(1), ::Regexp.last_match(2), ::Regexp.last_match(3)
|
|
140
|
+
yyyy = yr.length == 2 ? "19#{yr}" : yr
|
|
141
|
+
"#{num}/Upd1-#{yyyy}#{format('%02d', mon.to_i)}"
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Uppercase lone letter suffixes attached to numbers. NCSTAR keeps
|
|
146
|
+
# lowercase volume letters (e.g. "1-1av1") per its grammar.
|
|
147
|
+
def normalize_letter_suffix_casing!
|
|
148
|
+
uppercase_dash_letter!
|
|
149
|
+
uppercase_trailing_letter!
|
|
150
|
+
uppercase_revision_letter!
|
|
151
|
+
uppercase_letter_before_revision!
|
|
152
|
+
uppercase_letter_before_volume! unless @cleaned.include?("NCSTAR")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Trailing "-a" → "-A" at end of identifier.
|
|
156
|
+
def uppercase_dash_letter!
|
|
157
|
+
@cleaned = @cleaned.gsub(/(\d)-([a-z])$/) { "#{$1}-#{$2.upcase}" }
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Trailing "a" → "A" when attached directly to a digit (excludes
|
|
161
|
+
# "r" to preserve revision+year patterns like "73-197r").
|
|
162
|
+
def uppercase_trailing_letter!
|
|
163
|
+
@cleaned = @cleaned.gsub(/(\d)([a-z&&[^r]])$/) { "#{$1}#{$2.upcase}" }
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Letter suffix on revision: "22r1a" → "22r1A".
|
|
167
|
+
def uppercase_revision_letter!
|
|
168
|
+
@cleaned = @cleaned.gsub(/(\d)(r)(\d+)([a-z])$/) do
|
|
169
|
+
"#{$1}#{$2}#{$3}#{$4.upcase}"
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Letter between number and revision: "53ar1" → "53Ar1".
|
|
174
|
+
def uppercase_letter_before_revision!
|
|
175
|
+
@cleaned = @cleaned.gsub(/(\d)([a-z])(r\d)/) do
|
|
176
|
+
"#{$1}#{$2.upcase}#{$3}"
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Letter between number and volume: "1-2bv1" → "1-2Bv1". Skipped
|
|
181
|
+
# for NCSTAR which preserves lowercase letters per its grammar.
|
|
182
|
+
def uppercase_letter_before_volume!
|
|
183
|
+
@cleaned = @cleaned.gsub(/(\d)([a-z&&[^r]])(v\d+)/) do
|
|
184
|
+
"#{$1}#{$2.upcase}#{$3}"
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Volume/draft spacing and supplement typo fixes that must run
|
|
189
|
+
# before the more general draft and supplement normalizations.
|
|
190
|
+
def normalize_draft_and_volume!
|
|
191
|
+
@cleaned = @cleaned.gsub(/(\d{2}-\d{4})\s+(\d)$/, '\1 v\2')
|
|
192
|
+
@cleaned = @cleaned.gsub(/(\d)-draft(\d)/, '\1 -draft \2')
|
|
193
|
+
@cleaned = @cleaned.gsub(/(\d)draft(\d)/, '\1 -draft \2')
|
|
194
|
+
@cleaned = @cleaned.gsub(/(\d)suprev/, '\1supprev')
|
|
195
|
+
@cleaned = @cleaned.gsub(
|
|
196
|
+
/(\d{2,})([A-Z])(r\d+)([-\s]draft\d*)/, '\1\2 \3\4'
|
|
197
|
+
)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Roman numeral volumes → "v<arabic> ver<version>" per NIST spec.
|
|
201
|
+
def convert_roman_volumes!
|
|
202
|
+
@cleaned = @cleaned.gsub(/(\d+)-([IVX]+)-(\d+(?:\.\d+)*)/) do
|
|
203
|
+
"#{Regexp.last_match(1)} v#{roman_to_arabic(Regexp.last_match(2))} " \
|
|
204
|
+
"ver#{Regexp.last_match(3)}"
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# LCIRC supplement with slash-year separator, and "Pt" part prefix
|
|
209
|
+
# with revision.
|
|
210
|
+
def normalize_supplement_and_part!
|
|
211
|
+
@cleaned = @cleaned.gsub(/(\d)(supp\d+\/\d{4})/, '\1 \2')
|
|
212
|
+
@cleaned = @cleaned.gsub(/(\d)Pt(\d+)(r\d+)/, '\1 pt\2 \3')
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Version notation: insert spaces between digits and "ver" / "v",
|
|
216
|
+
# split combined fields, normalize volume ranges.
|
|
217
|
+
def normalize_version_notation!
|
|
218
|
+
@cleaned = @cleaned.gsub(/(\d)ver(\d)/, '\1 ver \2')
|
|
219
|
+
@cleaned = @cleaned.gsub(/ver(\d+)e(\d{4})/, 'ver\1 e\2')
|
|
220
|
+
@cleaned = @cleaned.gsub(/ver(\d+)v(\d+)/, 'ver\1 v\2')
|
|
221
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
|
|
222
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
|
|
223
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+)\s+(\d+)$/, '\1 \2.\3')
|
|
224
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+)\s+(\d+)\s+(\d+)$/, '\1 \2.\3.\4')
|
|
225
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+[a-z]-[a-z])/, '\1 \2')
|
|
226
|
+
@cleaned = @cleaned.gsub(/(\d)(v\d+[A-Z])/, '\1 \2')
|
|
227
|
+
@cleaned = @cleaned.gsub(/(v\d+)([A-Z])-([A-Z])/, '\1\2-\3'.downcase)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Edition year suffix shorthand: "2006ed." → "e2006".
|
|
231
|
+
def normalize_edition_year_suffix!
|
|
232
|
+
@cleaned = @cleaned.gsub(/(\d{4})ed\./, 'e\1')
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
# Revision attached to a number with optional letter suffix. When
|
|
236
|
+
# a letter suffix is present, keep them together for the
|
|
237
|
+
# second_number grammar rule; otherwise insert a space before
|
|
238
|
+
# following uppercase letters or update keywords.
|
|
239
|
+
def normalize_revision_with_letter!
|
|
240
|
+
@cleaned = @cleaned.gsub(/(\d+)(r\d{1,2})([a-z])(?=-|[A-Z]|$)/) do
|
|
241
|
+
"#{Regexp.last_match(1)}#{Regexp.last_match(2)}" \
|
|
242
|
+
"#{Regexp.last_match(3).upcase}"
|
|
243
|
+
end
|
|
244
|
+
# rubocop:disable Layout/LineLength
|
|
245
|
+
@cleaned = @cleaned.gsub(/(\d+)(r\d{1,2})(?![a-zA-Z])(?=[A-Z]|-(?=[A-Z])|\/(?:upd|errata|insert))/) do
|
|
246
|
+
"#{Regexp.last_match(1)} #{Regexp.last_match(2)}"
|
|
247
|
+
end
|
|
248
|
+
# rubocop:enable Layout/LineLength
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Dotted versions with internal spaces ("v1 1" → "v1.1"). Negative
|
|
252
|
+
# lookahead prevents swallowing draft stage digits ("189 2pd").
|
|
253
|
+
def normalize_version_dotted_spaces!
|
|
254
|
+
# rubocop:disable Layout/LineLength
|
|
255
|
+
@cleaned = @cleaned.gsub(/(\b(?:v|\d)[v\d]*[-A-Z]*)\s+(\d+)(?!(?i:pd|wd|prd)\b)\s+(\d+)(?!(?i:pd|wd|prd)\b)/, '\1.\2.\3')
|
|
256
|
+
@cleaned = @cleaned.gsub(/(\b(?:v|\d)[v\d]*)\s+(\d+)(?!(?i:pd|wd|prd)\b)/, '\1.\2')
|
|
257
|
+
# rubocop:enable Layout/LineLength
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Update markers ("-upd", "/upd") need a space before them so the
|
|
261
|
+
# grammar's update rule can match.
|
|
262
|
+
def normalize_update_markers!
|
|
263
|
+
@cleaned = @cleaned.gsub(/(\d+)-upd(\d*)/, '\1 -upd\2')
|
|
264
|
+
@cleaned = @cleaned.gsub(/(\d+)\/upd(\d*)/, '\1 /upd\2')
|
|
265
|
+
@cleaned = @cleaned.gsub(/([a-z]\d+)-upd/, '\1 -upd')
|
|
266
|
+
@cleaned = @cleaned.gsub(/([a-z]\d+)\/upd/, '\1 /upd')
|
|
267
|
+
@cleaned = @cleaned.gsub(/(\d+[A-Z])-upd(\d*)/, '\1 -upd\2')
|
|
268
|
+
@cleaned = @cleaned.gsub(/(\d+[A-Z])\/upd(\d*)/, '\1 /upd\2')
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Supplement prefix variants ("sup", "sup+", "supp") all need a
|
|
272
|
+
# space before them; the "sup" form is normalized to "supp" when
|
|
273
|
+
# attached to a letter suffix or slash-year.
|
|
274
|
+
def normalize_supplement_variants!
|
|
275
|
+
@cleaned = @cleaned.gsub(/(\d)(sup\d)/, '\1 \2')
|
|
276
|
+
@cleaned = @cleaned.gsub(/(\d)(sup+)(\d)/, '\1 \2\3')
|
|
277
|
+
@cleaned = @cleaned.gsub(/(\d)(sup\+)(\d)/, '\1 \2\3')
|
|
278
|
+
@cleaned = @cleaned.gsub(/(\d)(sup\d+)/, '\1 \2')
|
|
279
|
+
@cleaned = @cleaned.gsub(/(\d)(sup\d+\b)/, '\1 \2')
|
|
280
|
+
@cleaned = @cleaned.gsub(/(\d+[A-Z])sup(\b)/, '\1supp\2')
|
|
281
|
+
@cleaned = @cleaned.gsub(/(\d+)sup(\d+\/\d{4})/, '\1supp\2')
|
|
282
|
+
@cleaned = @cleaned.gsub(/(\d)(supp?)-(\d{4})(?![\d\/])/, '\1supp\3')
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Standalone "r" between number-letter and revision, bare trailing
|
|
286
|
+
# "r" → "r1" (V1 empty-revision normalization), and revision
|
|
287
|
+
# directly followed by a language code.
|
|
288
|
+
def normalize_revision_language!
|
|
289
|
+
@cleaned = @cleaned.gsub(/(\d[a-z])r\b/, '\1 r')
|
|
290
|
+
@cleaned = @cleaned.gsub(/(\d)r\z/, '\1r1')
|
|
291
|
+
@cleaned = @cleaned.gsub(
|
|
292
|
+
/(r\d+)(es|pt|chi|viet|port|esp)\b/, '\1 \2'
|
|
293
|
+
)
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# MR-format translation codes (".spa", ".por", ".ind") would be
|
|
297
|
+
# misparsed as letter suffixes — convert the trailing dot to a space.
|
|
298
|
+
def normalize_mr_translation_codes!
|
|
299
|
+
@cleaned = @cleaned.gsub(
|
|
300
|
+
/^([A-Z]+)\.SP\.(\d+)\.([a-z]{2,4})$/, '\1.SP.\2 \3'
|
|
301
|
+
)
|
|
302
|
+
@cleaned = @cleaned.gsub(
|
|
303
|
+
/^([A-Z]+)\.([A-Z]+)\.(\d+)\.([a-z]{2,4})$/, '\1.\2.\3 \4'
|
|
304
|
+
)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Trailing "-YYYY" → "eYYYY" edition marker, but only when the
|
|
308
|
+
# four-digit group is plausibly a year (1901–2099). Part numbers
|
|
309
|
+
# outside that range (e.g. SP 250-1039) are left untouched.
|
|
310
|
+
#
|
|
311
|
+
# The letter suffix may be lower- or uppercase (e.g. SP 800-38b-2005);
|
|
312
|
+
# it is upcased so the year edition splits off cleanly and the letter
|
|
313
|
+
# becomes a Part component ("800-38Be2005"), matching how a letter
|
|
314
|
+
# suffix without a year (800-38a → 800-38A) is already normalized.
|
|
315
|
+
# "e"/"E" are excluded from the letter so they cannot be confused with
|
|
316
|
+
# the edition marker itself.
|
|
317
|
+
def convert_dashyear_to_edition!
|
|
318
|
+
@cleaned = @cleaned.gsub(
|
|
319
|
+
/(?<!e\d)(?<![eE-])(\d(?:[A-DF-Za-df-z]?))-(\d{4})(?=\s|$)/,
|
|
320
|
+
) do |match|
|
|
321
|
+
prefix = Regexp.last_match(1)
|
|
322
|
+
year = Regexp.last_match(2).to_i
|
|
323
|
+
year.between?(1901, 2099) ? "#{prefix.upcase}e#{year}" : match
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Series-specific reverts: HB handbooks, OWMWP dates, and RPT year
|
|
328
|
+
# ranges use dash-year structurally (not as an edition marker), so
|
|
329
|
+
# the broad convert_dashyear_to_edition! rule would corrupt them.
|
|
330
|
+
def revert_dashyear_for_series!
|
|
331
|
+
revert_handbook_edition!
|
|
332
|
+
revert_owmwp_date!
|
|
333
|
+
revert_report_year_range!
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
# HB handbooks: "HB 130e1979" → "HB 130-1979" (year is part of
|
|
337
|
+
# the handbook designation, not an edition marker).
|
|
338
|
+
def revert_handbook_edition!
|
|
339
|
+
@cleaned = @cleaned.gsub(
|
|
340
|
+
/\b(HB|HB\s+)[^:\s.]*?(\d+)e(\d{4})(?=\s|$)/, '\1\2-\3'
|
|
341
|
+
)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# OWMWP series: dates use MM-DD-YYYY format, so "OWMWP 06-13e2018"
|
|
345
|
+
# reverts to "OWMWP 06-13-2018".
|
|
346
|
+
def revert_owmwp_date!
|
|
347
|
+
@cleaned = @cleaned.gsub(
|
|
348
|
+
/\b(OWMWP|OWMWP\s*)[^:\s]*?(\d{2})-(\d{2})e(\d{4})(?=\s|$)/,
|
|
349
|
+
'\1\2-\3-\4',
|
|
350
|
+
)
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
# RPT series: year ranges "1946-1947" should not be reinterpreted as
|
|
354
|
+
# editions. Only revert when the first year precedes the second.
|
|
355
|
+
def revert_report_year_range!
|
|
356
|
+
@cleaned = @cleaned.gsub(
|
|
357
|
+
/\b(RPT|RPT\s*)([^:\s]*?)(\d{4})e(\d{4})(?=\s|$)/,
|
|
358
|
+
) { |m| build_report_year_range(m, Regexp.last_match.captures) }
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Build the reverted year-range form from the gsub captures, or
|
|
362
|
+
# return the original match when the years are not a forward range.
|
|
363
|
+
def build_report_year_range(match, captures)
|
|
364
|
+
prefix, separator, first, second = captures
|
|
365
|
+
return match unless first.to_i < second.to_i
|
|
366
|
+
|
|
367
|
+
"#{prefix}#{separator}#{first}-#{second}"
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# Verbose version markers ("v1.1" → "ver1.1", "Ver. 2.0" →
|
|
371
|
+
# "ver2.0"), MR-format "-v" → ".ver".
|
|
372
|
+
def normalize_version_verbose!
|
|
373
|
+
@cleaned = @cleaned.gsub(/-v(\d+\.\d+)/, '.ver\1')
|
|
374
|
+
@cleaned = @cleaned.gsub(/\bVer\.\s+(\d+(?:\.\d+)*)/, 'ver\1')
|
|
375
|
+
@cleaned = @cleaned.gsub(/\bv(\d+\.\d+(?:\.\d+)*)/, 'ver\1')
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Part notation: uppercase "P" → "p"; lone "p"/"n" → "pt" (unless
|
|
379
|
+
# followed by a 4-digit year, which is part+year not part-prefix).
|
|
380
|
+
def normalize_part_notation!
|
|
381
|
+
@cleaned = @cleaned.gsub(/(\d)P(\d)/, '\1 p\2')
|
|
382
|
+
@cleaned = @cleaned.gsub(/\b([pn])(\d+)(?!\d{4}\b)/, 'pt\2')
|
|
383
|
+
@cleaned = @cleaned.gsub(/(\d)([pP]\d+)/, '\1 \2')
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
# Series-specific spacing rules: CRPL-F needs a space after the
|
|
387
|
+
# letter band; compound report numbers ("17-917v3") need the
|
|
388
|
+
# volume broken out.
|
|
389
|
+
def normalize_series_specific_spacing!
|
|
390
|
+
@cleaned = @cleaned.gsub(/(NBS CRPL-F-[AB])(\d)/, '\1 \2')
|
|
391
|
+
@cleaned = @cleaned.gsub(/(CRPL-F-[AB])(\d)/, '\1 \2')
|
|
392
|
+
@cleaned = @cleaned.gsub(/(\d+-\d+)(v\d+)(?![.\d])/, '\1 \2')
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Verbose keyword spellings ("Version", "Revision", "Part", "Add",
|
|
396
|
+
# "Suppl", "report") normalized to their short canonical forms.
|
|
397
|
+
def normalize_verbose_keywords!
|
|
398
|
+
@cleaned = @cleaned.gsub(/(\d+)\s+Suppl\b/, '\1Suppl')
|
|
399
|
+
@cleaned = @cleaned.gsub(/\s+Version\s+(\d+)/, ' ver \1')
|
|
400
|
+
@cleaned = @cleaned.gsub(/\s+Revision\s+\(r\)/, " r")
|
|
401
|
+
@cleaned = @cleaned.gsub(/\s+Part\s+(\d+)/, 'pt\1')
|
|
402
|
+
@cleaned = @cleaned.gsub(/(\d[a-z]?)\s+Add\b\.?/i) do
|
|
403
|
+
"#{Regexp.last_match(1).upcase} Add."
|
|
404
|
+
end
|
|
405
|
+
@cleaned = @cleaned.gsub(/(\d+)\s+rev\s+(\d{4})/, '\1r\2')
|
|
406
|
+
@cleaned = @cleaned.gsub(/\breport\s*;\s*/, "RPT ")
|
|
407
|
+
@cleaned = @cleaned.gsub(/\breport\b/, "RPT")
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Translate a Roman numeral into its Arabic equivalent.
|
|
411
|
+
def roman_to_arabic(roman)
|
|
412
|
+
ROMAN_TO_ARABIC.fetch(roman, roman)
|
|
413
|
+
end
|
|
414
|
+
end
|
|
415
|
+
end
|
|
416
|
+
end
|