pubid 2.0.0.pre.alpha.1 → 2.0.0.pre.alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +5 -1
- data/data/nist/update_codes.yaml +27 -0
- data/lib/pubid/amca/builder.rb +2 -2
- data/lib/pubid/amca/identifier.rb +7 -0
- data/lib/pubid/amca/identifiers/base.rb +0 -26
- data/lib/pubid/amca/identifiers/interpretation.rb +0 -17
- data/lib/pubid/amca/identifiers/publication.rb +0 -13
- data/lib/pubid/amca/renderer.rb +82 -0
- data/lib/pubid/amca/single_identifier.rb +0 -23
- data/lib/pubid/amca/urn_parser.rb +28 -0
- data/lib/pubid/amca.rb +42 -1
- data/lib/pubid/ansi/builder.rb +5 -3
- data/lib/pubid/ansi/identifier.rb +1 -1
- data/lib/pubid/ansi/identifiers/american_national_standard.rb +2 -1
- data/lib/pubid/ansi/identifiers/standard.rb +2 -3
- data/lib/pubid/ansi/renderer.rb +53 -0
- data/lib/pubid/ansi/single_identifier.rb +2 -31
- data/lib/pubid/ansi/urn_generator.rb +3 -38
- data/lib/pubid/ansi/urn_parser.rb +23 -0
- data/lib/pubid/ansi.rb +38 -3
- data/lib/pubid/api/builder.rb +29 -74
- data/lib/pubid/api/identifier.rb +0 -4
- data/lib/pubid/api/identifiers/base.rb +0 -2
- data/lib/pubid/api/identifiers/bulletin.rb +0 -2
- data/lib/pubid/api/identifiers/continuous_operations_standard.rb +0 -2
- data/lib/pubid/api/identifiers/mpms.rb +1 -17
- data/lib/pubid/api/identifiers/publication.rb +0 -2
- data/lib/pubid/api/identifiers/recommended_practice.rb +0 -2
- data/lib/pubid/api/identifiers/specification.rb +0 -2
- data/lib/pubid/api/identifiers/standard.rb +0 -2
- data/lib/pubid/api/identifiers/technical_report.rb +0 -2
- data/lib/pubid/api/identifiers/typeless_standard.rb +1 -14
- data/lib/pubid/api/identifiers.rb +18 -0
- data/lib/pubid/api/renderer.rb +89 -0
- data/lib/pubid/api/single_identifier.rb +1 -13
- data/lib/pubid/api/urn_generator.rb +0 -18
- data/lib/pubid/api/urn_parser.rb +35 -0
- data/lib/pubid/api.rb +51 -5
- data/lib/pubid/ashrae/builder.rb +3 -3
- data/lib/pubid/ashrae/identifier.rb +6 -0
- data/lib/pubid/ashrae/identifiers/addenda_package.rb +0 -10
- data/lib/pubid/ashrae/identifiers/addendum.rb +0 -19
- data/lib/pubid/ashrae/identifiers/base.rb +3 -0
- data/lib/pubid/ashrae/identifiers/combined_addenda.rb +0 -15
- data/lib/pubid/ashrae/identifiers/errata.rb +0 -10
- data/lib/pubid/ashrae/identifiers/interpretation.rb +0 -10
- data/lib/pubid/ashrae/renderer.rb +117 -0
- data/lib/pubid/ashrae/single_identifier.rb +0 -13
- data/lib/pubid/ashrae/urn_generator.rb +0 -8
- data/lib/pubid/ashrae/urn_parser.rb +27 -0
- data/lib/pubid/ashrae.rb +42 -1
- data/lib/pubid/asme/components/code.rb +10 -2
- data/lib/pubid/asme/identifiers/base.rb +0 -60
- data/lib/pubid/asme/renderer.rb +66 -0
- data/lib/pubid/asme/urn_parser.rb +31 -0
- data/lib/pubid/asme.rb +42 -1
- data/lib/pubid/astm/components/code.rb +9 -0
- data/lib/pubid/{jis → astm}/components.rb +1 -1
- data/lib/pubid/astm/identifiers/adjunct.rb +0 -8
- data/lib/pubid/astm/identifiers/data_series.rb +0 -14
- data/lib/pubid/astm/identifiers/iso_dual_published.rb +9 -34
- data/lib/pubid/astm/identifiers/manual.rb +0 -27
- data/lib/pubid/astm/identifiers/monograph.rb +0 -14
- data/lib/pubid/astm/identifiers/research_report.rb +0 -7
- data/lib/pubid/astm/identifiers/standard.rb +0 -39
- data/lib/pubid/astm/identifiers/technical_report.rb +0 -13
- data/lib/pubid/astm/identifiers/work_in_progress.rb +0 -11
- data/lib/pubid/astm/identifiers.rb +18 -0
- data/lib/pubid/astm/renderer.rb +172 -0
- data/lib/pubid/astm/single_identifier.rb +0 -10
- data/lib/pubid/astm/urn_parser.rb +30 -0
- data/lib/pubid/astm.rb +39 -27
- data/lib/pubid/bsi/builder.rb +21 -12
- data/lib/pubid/bsi/identifier.rb +8 -2
- data/lib/pubid/bsi/identifiers/addendum_document.rb +3 -33
- data/lib/pubid/bsi/identifiers/adopted_european_norm.rb +11 -47
- data/lib/pubid/bsi/identifiers/adopted_international_standard.rb +11 -38
- data/lib/pubid/bsi/identifiers/aerospace_standard.rb +3 -53
- data/lib/pubid/bsi/identifiers/amendment.rb +3 -19
- data/lib/pubid/bsi/identifiers/british_industrial_practice.rb +2 -4
- data/lib/pubid/bsi/identifiers/british_standard.rb +2 -1
- data/lib/pubid/bsi/identifiers/bundled_identifier.rb +3 -84
- data/lib/pubid/bsi/identifiers/committee_document.rb +1 -14
- data/lib/pubid/bsi/identifiers/consolidated_identifier.rb +3 -84
- data/lib/pubid/bsi/identifiers/corrigendum.rb +3 -7
- data/lib/pubid/bsi/identifiers/detailed_specification.rb +1 -34
- data/lib/pubid/bsi/identifiers/disc.rb +1 -27
- data/lib/pubid/bsi/identifiers/draft_document.rb +3 -44
- data/lib/pubid/bsi/identifiers/electronic_book.rb +3 -36
- data/lib/pubid/bsi/identifiers/expert_commentary.rb +3 -15
- data/lib/pubid/bsi/identifiers/explanatory_supplement.rb +1 -45
- data/lib/pubid/bsi/identifiers/flex.rb +1 -33
- data/lib/pubid/bsi/identifiers/handbook.rb +2 -13
- data/lib/pubid/bsi/identifiers/index.rb +1 -30
- data/lib/pubid/bsi/identifiers/method.rb +1 -39
- data/lib/pubid/bsi/identifiers/national_annex.rb +5 -27
- data/lib/pubid/bsi/identifiers/practice_guide.rb +2 -4
- data/lib/pubid/bsi/identifiers/publicly_available_specification.rb +3 -52
- data/lib/pubid/bsi/identifiers/published_document.rb +3 -52
- data/lib/pubid/bsi/identifiers/section.rb +1 -28
- data/lib/pubid/bsi/identifiers/set.rb +3 -17
- data/lib/pubid/bsi/identifiers/standalone_amendment.rb +1 -7
- data/lib/pubid/bsi/identifiers/supplement_document.rb +3 -21
- data/lib/pubid/bsi/identifiers/supplementary_index.rb +1 -44
- data/lib/pubid/bsi/identifiers/technical_specification.rb +3 -45
- data/lib/pubid/bsi/identifiers/test_method.rb +1 -30
- data/lib/pubid/bsi/identifiers/value_added_publication.rb +3 -14
- data/lib/pubid/bsi/identifiers.rb +0 -1
- data/lib/pubid/bsi/renderer.rb +1050 -0
- data/lib/pubid/bsi/single_identifier.rb +6 -70
- data/lib/pubid/bsi/urn_generator.rb +2 -3
- data/lib/pubid/bsi/urn_parser.rb +52 -0
- data/lib/pubid/bsi.rb +224 -1
- data/lib/pubid/builder/base.rb +57 -10
- data/lib/pubid/bundled_identifier.rb +0 -1
- data/lib/pubid/ccsds/builder.rb +4 -3
- data/lib/pubid/ccsds/identifier.rb +66 -1
- data/lib/pubid/ccsds/identifiers/base.rb +11 -50
- data/lib/pubid/ccsds/identifiers/corrigendum.rb +7 -6
- data/lib/pubid/ccsds/parser.rb +4 -2
- data/lib/pubid/ccsds/single_identifier.rb +4 -1
- data/lib/pubid/ccsds/supplement_identifier.rb +15 -11
- data/lib/pubid/ccsds/urn_generator.rb +3 -3
- data/lib/pubid/ccsds/urn_parser.rb +20 -0
- data/lib/pubid/ccsds.rb +39 -1
- data/lib/pubid/cen_cenelec/builder.rb +12 -14
- data/lib/pubid/cen_cenelec/identifier.rb +8 -2
- data/lib/pubid/cen_cenelec/identifiers/adopted_european_norm.rb +13 -4
- data/lib/pubid/cen_cenelec/identifiers/amendment.rb +2 -8
- data/lib/pubid/cen_cenelec/identifiers/base.rb +5 -41
- data/lib/pubid/cen_cenelec/identifiers/cen_report.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/cen_workshop_agreement.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/consolidated_identifier.rb +2 -25
- data/lib/pubid/cen_cenelec/identifiers/corrigendum.rb +2 -13
- data/lib/pubid/cen_cenelec/identifiers/european_norm.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/european_prestandard.rb +4 -7
- data/lib/pubid/cen_cenelec/identifiers/european_specification.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/fragment.rb +2 -2
- data/lib/pubid/cen_cenelec/identifiers/harmonization_document.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/technical_report.rb +2 -1
- data/lib/pubid/cen_cenelec/identifiers/technical_specification.rb +2 -1
- data/lib/pubid/cen_cenelec/renderer.rb +261 -0
- data/lib/pubid/cen_cenelec/single_identifier.rb +11 -89
- data/lib/pubid/cen_cenelec/urn_generator.rb +6 -6
- data/lib/pubid/cen_cenelec/urn_parser.rb +28 -0
- data/lib/pubid/cen_cenelec.rb +168 -1
- data/lib/pubid/cie/components/code.rb +8 -0
- data/lib/pubid/cie/identifier.rb +6 -4
- data/lib/pubid/cie/urn_parser.rb +28 -0
- data/lib/pubid/cie.rb +43 -1
- data/lib/pubid/components/adoption.rb +104 -0
- data/lib/pubid/components/code.rb +22 -8
- data/lib/pubid/components/date.rb +23 -16
- data/lib/pubid/components/edition.rb +9 -6
- data/lib/pubid/components/iteration.rb +32 -0
- data/lib/pubid/components/language.rb +6 -4
- data/lib/pubid/components/locality.rb +10 -1
- data/lib/pubid/components/publisher.rb +9 -6
- data/lib/pubid/components/relationship.rb +151 -0
- data/lib/pubid/components/stage.rb +5 -14
- data/lib/pubid/components/supplement.rb +184 -0
- data/lib/pubid/components/type.rb +5 -15
- data/lib/pubid/components/typed_stage.rb +11 -8
- data/lib/pubid/components.rb +4 -0
- data/lib/pubid/core/update_codes.rb +28 -7
- data/lib/pubid/csa/identifier.rb +0 -3
- data/lib/pubid/csa/identifiers/base.rb +2 -122
- data/lib/pubid/csa/identifiers/cec.rb +2 -101
- data/lib/pubid/csa/identifiers/series.rb +2 -102
- data/lib/pubid/csa/renderer.rb +292 -0
- data/lib/pubid/csa/urn_generator.rb +1 -1
- data/lib/pubid/csa/urn_parser.rb +33 -0
- data/lib/pubid/csa.rb +42 -1
- data/lib/pubid/etsi/components/code.rb +9 -2
- data/lib/pubid/etsi/identifiers/base.rb +1 -4
- data/lib/pubid/etsi/identifiers/supplement_identifier.rb +2 -9
- data/lib/pubid/etsi/renderer.rb +42 -0
- data/lib/pubid/etsi/urn_parser.rb +34 -0
- data/lib/pubid/etsi.rb +42 -1
- data/lib/pubid/export/exporter.rb +4 -46
- data/lib/pubid/export/flavor_exporter.rb +111 -278
- data/lib/pubid/export.rb +0 -6
- data/lib/pubid/identifier.rb +10 -18
- data/lib/pubid/identifier_facade.rb +114 -0
- data/lib/pubid/identifier_metadata.rb +1 -1
- data/lib/pubid/idf/builder.rb +3 -3
- data/lib/pubid/idf/identifier.rb +3 -6
- data/lib/pubid/idf/identifiers/amendment.rb +2 -1
- data/lib/pubid/idf/identifiers/corrigendum.rb +2 -1
- data/lib/pubid/idf/identifiers/international_standard.rb +2 -1
- data/lib/pubid/idf/identifiers/reviewed_method.rb +2 -1
- data/lib/pubid/idf/parser.rb +3 -2
- data/lib/pubid/idf/renderer.rb +84 -0
- data/lib/pubid/idf/supplement_identifier.rb +2 -10
- data/lib/pubid/idf/urn_generator.rb +4 -39
- data/lib/pubid/idf/urn_parser.rb +25 -0
- data/lib/pubid/idf.rb +51 -1
- data/lib/pubid/iec/builder.rb +48 -65
- data/lib/pubid/iec/components/code.rb +9 -32
- data/lib/pubid/iec/components/publisher.rb +1 -1
- data/lib/pubid/iec/components.rb +14 -0
- data/lib/pubid/iec/identifier.rb +276 -3
- data/lib/pubid/iec/identifiers/amendment.rb +2 -3
- data/lib/pubid/iec/identifiers/base.rb +8 -28
- data/lib/pubid/iec/identifiers/component_specification.rb +3 -3
- data/lib/pubid/iec/identifiers/conformity_assessment.rb +1 -2
- data/lib/pubid/iec/identifiers/consolidated_identifier.rb +27 -30
- data/lib/pubid/iec/identifiers/corrigendum.rb +2 -3
- data/lib/pubid/iec/identifiers/fragment_identifier.rb +37 -26
- data/lib/pubid/iec/identifiers/guide.rb +0 -2
- data/lib/pubid/iec/identifiers/international_standard.rb +2 -3
- data/lib/pubid/iec/identifiers/interpretation_sheet.rb +2 -3
- data/lib/pubid/iec/identifiers/operational_document.rb +3 -3
- data/lib/pubid/iec/identifiers/publicly_available_specification.rb +2 -3
- data/lib/pubid/iec/identifiers/sheet_identifier.rb +21 -15
- data/lib/pubid/iec/identifiers/societal_technology_trend_report.rb +3 -3
- data/lib/pubid/iec/identifiers/systems_reference_document.rb +2 -3
- data/lib/pubid/iec/identifiers/technical_report.rb +2 -3
- data/lib/pubid/iec/identifiers/technical_specification.rb +2 -3
- data/lib/pubid/iec/identifiers/technology_report.rb +1 -2
- data/lib/pubid/iec/identifiers/test_report_form.rb +5 -34
- data/lib/pubid/iec/identifiers/vap_identifier.rb +26 -23
- data/lib/pubid/iec/identifiers/white_paper.rb +3 -3
- data/lib/pubid/iec/identifiers/working_document.rb +4 -48
- data/lib/pubid/iec/identifiers.rb +30 -0
- data/lib/pubid/iec/parser.rb +20 -14
- data/lib/pubid/iec/renderer.rb +254 -0
- data/lib/pubid/iec/single_identifier.rb +6 -12
- data/lib/pubid/iec/supplement_identifier.rb +58 -54
- data/lib/pubid/iec/urn_generator.rb +57 -171
- data/lib/pubid/iec/urn_parser.rb +53 -252
- data/lib/pubid/iec.rb +40 -68
- data/lib/pubid/ieee/builder.rb +12 -12
- data/lib/pubid/ieee/components/code.rb +8 -0
- data/lib/pubid/ieee/components/draft.rb +14 -0
- data/lib/pubid/ieee/components/relationship.rb +5 -149
- data/lib/pubid/ieee/identifier.rb +6 -0
- data/lib/pubid/ieee/identifiers/adopted_standard.rb +1 -6
- data/lib/pubid/ieee/identifiers/base.rb +101 -458
- data/lib/pubid/ieee/identifiers/conformance_identifier.rb +1 -7
- data/lib/pubid/ieee/identifiers/corrigendum.rb +1 -9
- data/lib/pubid/ieee/identifiers/csa_dual_published.rb +1 -7
- data/lib/pubid/ieee/identifiers/dual_identifier.rb +1 -1
- data/lib/pubid/ieee/identifiers/dual_published.rb +1 -1
- data/lib/pubid/ieee/identifiers/iec_ieee_copublished.rb +1 -6
- data/lib/pubid/ieee/identifiers/interpretation_identifier.rb +1 -7
- data/lib/pubid/ieee/identifiers/joint_development.rb +2 -0
- data/lib/pubid/ieee/identifiers/multi_numbered_identifier.rb +1 -15
- data/lib/pubid/ieee/identifiers/parenthetical_identifier.rb +1 -3
- data/lib/pubid/ieee/identifiers/project_draft_identifier.rb +15 -0
- data/lib/pubid/ieee/identifiers/redlined_standard.rb +1 -4
- data/lib/pubid/ieee/identifiers/si_standard.rb +1 -35
- data/lib/pubid/ieee/identifiers/standard.rb +1 -1
- data/lib/pubid/ieee/pre_parser.rb +301 -0
- data/lib/pubid/ieee/renderer.rb +307 -0
- data/lib/pubid/ieee/urn_parser.rb +34 -0
- data/lib/pubid/ieee.rb +62 -1
- data/lib/pubid/ieee_debug.rb +0 -1
- data/lib/pubid/iho/builder.rb +2 -2
- data/lib/pubid/iho/identifier.rb +8 -0
- data/lib/pubid/iho/identifiers/base.rb +49 -10
- data/lib/pubid/iho/identifiers/bibliographic.rb +0 -4
- data/lib/pubid/iho/identifiers/circular_letter.rb +0 -4
- data/lib/pubid/iho/identifiers/miscellaneous.rb +0 -4
- data/lib/pubid/iho/identifiers/publication.rb +0 -4
- data/lib/pubid/iho/identifiers/standard.rb +0 -4
- data/lib/pubid/iho/parser.rb +3 -3
- data/lib/pubid/iho/renderer.rb +30 -0
- data/lib/pubid/iho/urn_generator.rb +3 -3
- data/lib/pubid/iho/urn_parser.rb +58 -0
- data/lib/pubid/iho.rb +50 -1
- data/lib/pubid/iso/builder.rb +59 -53
- data/lib/pubid/iso/bundled_identifier.rb +51 -0
- data/lib/pubid/iso/components/code.rb +7 -19
- data/lib/pubid/iso/components/publisher.rb +10 -8
- data/lib/pubid/iso/components.rb +2 -4
- data/lib/pubid/iso/identifier.rb +233 -6
- data/lib/pubid/iso/identifiers/addendum.rb +9 -6
- data/lib/pubid/iso/identifiers/amendment.rb +8 -4
- data/lib/pubid/iso/identifiers/corrigendum.rb +4 -4
- data/lib/pubid/iso/identifiers/data.rb +0 -1
- data/lib/pubid/iso/identifiers/directives.rb +8 -2
- data/lib/pubid/iso/identifiers/directives_supplement.rb +43 -14
- data/lib/pubid/iso/identifiers/extract.rb +2 -2
- data/lib/pubid/iso/identifiers/guide.rb +0 -1
- data/lib/pubid/iso/identifiers/international_standard.rb +4 -4
- data/lib/pubid/iso/identifiers/international_standardized_profile.rb +4 -4
- data/lib/pubid/iso/identifiers/international_workshop_agreement.rb +10 -4
- data/lib/pubid/iso/identifiers/pas.rb +2 -2
- data/lib/pubid/iso/identifiers/recommendation.rb +2 -2
- data/lib/pubid/iso/identifiers/supplement.rb +11 -3
- data/lib/pubid/iso/identifiers/tc_document.rb +44 -15
- data/lib/pubid/iso/identifiers/technical_report.rb +4 -4
- data/lib/pubid/iso/identifiers/technical_specification.rb +2 -2
- data/lib/pubid/iso/identifiers/technology_trends_assessments.rb +2 -2
- data/lib/pubid/iso/identifiers.rb +0 -1
- data/lib/pubid/iso/normalizer.rb +89 -0
- data/lib/pubid/iso/parser.rb +26 -6
- data/lib/pubid/iso/single_identifier.rb +6 -3
- data/lib/pubid/iso/supplement_identifier.rb +15 -2
- data/lib/pubid/iso/urn_generator.rb +74 -176
- data/lib/pubid/iso/urn_parser.rb +28 -9
- data/lib/pubid/iso.rb +173 -2
- data/lib/pubid/itu/builder.rb +0 -12
- data/lib/pubid/itu/components/code.rb +8 -0
- data/lib/pubid/itu/components.rb +11 -0
- data/lib/pubid/itu/identifier.rb +6 -39
- data/lib/pubid/itu/identifiers/amendment.rb +0 -2
- data/lib/pubid/itu/identifiers/annex.rb +0 -2
- data/lib/pubid/itu/identifiers/base.rb +0 -6
- data/lib/pubid/itu/identifiers/combined_identifier.rb +0 -2
- data/lib/pubid/itu/identifiers/corrigendum.rb +0 -2
- data/lib/pubid/itu/identifiers/recommendation.rb +0 -2
- data/lib/pubid/itu/identifiers/special_publication.rb +0 -2
- data/lib/pubid/itu/identifiers/supplement.rb +0 -2
- data/lib/pubid/itu/urn_parser.rb +23 -0
- data/lib/pubid/itu.rb +42 -1
- data/lib/pubid/jcgm/builder.rb +16 -8
- data/lib/pubid/jcgm/identifiers/amendment.rb +2 -7
- data/lib/pubid/jcgm/identifiers/gum_guide.rb +2 -10
- data/lib/pubid/jcgm/renderer.rb +68 -0
- data/lib/pubid/jcgm/single_identifier.rb +1 -5
- data/lib/pubid/jcgm/urn_generator.rb +4 -6
- data/lib/pubid/jcgm/urn_parser.rb +23 -0
- data/lib/pubid/jcgm.rb +43 -2
- data/lib/pubid/jis/builder.rb +44 -52
- data/lib/pubid/jis/identifier.rb +132 -3
- data/lib/pubid/jis/identifiers/amendment.rb +1 -1
- data/lib/pubid/jis/identifiers/corrigendum.rb +16 -0
- data/lib/pubid/jis/identifiers/standard.rb +2 -1
- data/lib/pubid/jis/identifiers/technical_report.rb +2 -1
- data/lib/pubid/jis/identifiers/technical_specification.rb +2 -1
- data/lib/pubid/jis/identifiers.rb +1 -1
- data/lib/pubid/jis/parser.rb +31 -5
- data/lib/pubid/jis/renderer.rb +69 -0
- data/lib/pubid/jis/single_identifier.rb +6 -12
- data/lib/pubid/jis/supplement_identifier.rb +17 -14
- data/lib/pubid/jis/urn_parser.rb +23 -0
- data/lib/pubid/jis.rb +42 -2
- data/lib/pubid/nist/builder.rb +122 -1761
- data/lib/pubid/nist/caster.rb +1272 -0
- data/lib/pubid/nist/circular_supplement_builder.rb +291 -0
- data/lib/pubid/nist/components/code.rb +9 -20
- data/lib/pubid/nist/components/edition.rb +16 -0
- data/lib/pubid/nist/components/supplement.rb +88 -21
- data/lib/pubid/nist/components.rb +0 -1
- data/lib/pubid/nist/identifier.rb +25 -0
- data/lib/pubid/nist/identifiers/base.rb +206 -64
- data/lib/pubid/nist/identifiers/circular.rb +7 -2
- data/lib/pubid/nist/identifiers/circular_supplement.rb +3 -2
- data/lib/pubid/nist/identifiers/commercial_standard.rb +2 -1
- data/lib/pubid/nist/identifiers/commercial_standard_emergency.rb +6 -4
- data/lib/pubid/nist/identifiers/commercial_standards_monthly.rb +10 -3
- data/lib/pubid/nist/identifiers/crpl_report.rb +8 -11
- data/lib/pubid/nist/identifiers/dated_document.rb +49 -0
- data/lib/pubid/nist/identifiers/federal_information_processing_standards.rb +17 -16
- data/lib/pubid/nist/identifiers/grant_contractor_report.rb +2 -1
- data/lib/pubid/nist/identifiers/handbook.rb +2 -1
- data/lib/pubid/nist/identifiers/internal_report.rb +2 -1
- data/lib/pubid/nist/identifiers/letter_circular.rb +2 -1
- data/lib/pubid/nist/identifiers/miscellaneous_publication.rb +5 -4
- data/lib/pubid/nist/identifiers/monograph.rb +7 -3
- data/lib/pubid/nist/identifiers/report.rb +4 -3
- data/lib/pubid/nist/identifiers/special_publication.rb +2 -1
- data/lib/pubid/nist/identifiers/technical_note.rb +3 -2
- data/lib/pubid/nist/identifiers.rb +1 -0
- data/lib/pubid/nist/parser.rb +67 -424
- data/lib/pubid/nist/parser_output_normalizer.rb +233 -0
- data/lib/pubid/nist/preprocessor.rb +416 -0
- data/lib/pubid/nist/renderer.rb +43 -0
- data/lib/pubid/nist/router.rb +148 -0
- data/lib/pubid/nist/series/base.rb +58 -0
- data/lib/pubid/nist/series/crpl.rb +13 -0
- data/lib/pubid/nist/series/fips.rb +14 -0
- data/lib/pubid/nist/series/ir.rb +60 -0
- data/lib/pubid/nist/series/letter_preserving.rb +15 -0
- data/lib/pubid/nist/series/mono.rb +19 -0
- data/lib/pubid/nist/series/ncstar.rb +20 -0
- data/lib/pubid/nist/series.rb +49 -0
- data/lib/pubid/nist/supplement_identifier.rb +11 -25
- data/lib/pubid/nist/urn_generator.rb +14 -8
- data/lib/pubid/nist/urn_parser.rb +67 -0
- data/lib/pubid/nist.rb +83 -4
- data/lib/pubid/oiml/components/code.rb +10 -0
- data/lib/pubid/oiml/identifiers/annex.rb +3 -45
- data/lib/pubid/oiml/identifiers/base.rb +2 -17
- data/lib/pubid/oiml/renderer.rb +161 -0
- data/lib/pubid/oiml/single_identifier.rb +6 -45
- data/lib/pubid/oiml/supplement_identifier.rb +4 -19
- data/lib/pubid/oiml/urn_generator.rb +0 -8
- data/lib/pubid/oiml/urn_parser.rb +22 -0
- data/lib/pubid/oiml.rb +42 -1
- data/lib/pubid/plateau/identifier.rb +23 -0
- data/lib/pubid/plateau/identifiers/handbook.rb +1 -3
- data/lib/pubid/plateau/identifiers/technical_report.rb +1 -1
- data/lib/pubid/plateau/renderer.rb +51 -0
- data/lib/pubid/plateau/supplement_identifier.rb +1 -1
- data/lib/pubid/plateau/urn_parser.rb +43 -0
- data/lib/pubid/plateau.rb +44 -1
- data/lib/pubid/renderers/base.rb +34 -0
- data/lib/pubid/renderers/directives_renderer.rb +27 -14
- data/lib/pubid/renderers/guide_renderer.rb +7 -1
- data/lib/pubid/renderers/human_readable.rb +31 -8
- data/lib/pubid/renderers/iwa_renderer.rb +5 -1
- data/lib/pubid/renderers/supplement_renderer.rb +4 -1
- data/lib/pubid/rendering/context.rb +33 -21
- data/lib/pubid/rendering.rb +0 -3
- data/lib/pubid/sae/components/date.rb +8 -0
- data/lib/pubid/sae/components/type.rb +5 -1
- data/lib/pubid/sae/identifiers/base.rb +2 -16
- data/lib/pubid/sae/renderer.rb +36 -0
- data/lib/pubid/sae/urn_generator.rb +2 -10
- data/lib/pubid/sae/urn_parser.rb +36 -0
- data/lib/pubid/sae.rb +42 -1
- data/lib/pubid/urn_generator/base.rb +12 -12
- data/lib/pubid/urn_parser/base.rb +81 -0
- data/lib/pubid/urn_parser/errors.rb +9 -0
- data/lib/pubid/urn_parser.rb +14 -0
- data/lib/pubid/version.rb +1 -1
- data/lib/pubid.rb +29 -7
- data/lib/tasks/website-data.json +1940 -1882
- metadata +77 -43
- data/lib/pubid/amca/scheme.rb +0 -16
- data/lib/pubid/ansi/scheme.rb +0 -15
- data/lib/pubid/api/scheme.rb +0 -66
- data/lib/pubid/ashrae/scheme.rb +0 -53
- data/lib/pubid/asme/scheme.rb +0 -37
- data/lib/pubid/astm/scheme.rb +0 -55
- data/lib/pubid/bsi/identifiers/base.rb +0 -11
- data/lib/pubid/bsi/scheme.rb +0 -243
- data/lib/pubid/ccsds/scheme.rb +0 -57
- data/lib/pubid/cen_cenelec/scheme.rb +0 -164
- data/lib/pubid/cie/scheme.rb +0 -64
- data/lib/pubid/csa/scheme.rb +0 -44
- data/lib/pubid/etsi/scheme.rb +0 -42
- data/lib/pubid/export/data_class_exporter.rb +0 -59
- data/lib/pubid/export/ieee_exporter.rb +0 -78
- data/lib/pubid/export/itu_exporter.rb +0 -66
- data/lib/pubid/export/nist_exporter.rb +0 -64
- data/lib/pubid/export/registry_exporter.rb +0 -90
- data/lib/pubid/export/scheme_exporter.rb +0 -70
- data/lib/pubid/identifier_registry.rb +0 -198
- data/lib/pubid/idf/scheme.rb +0 -61
- data/lib/pubid/iec/scheme.rb +0 -71
- data/lib/pubid/ieee/scheme.rb +0 -90
- data/lib/pubid/iho/scheme.rb +0 -29
- data/lib/pubid/iso/identifiers/base.rb +0 -115
- data/lib/pubid/iso/scheme.rb +0 -187
- data/lib/pubid/itu/scheme.rb +0 -174
- data/lib/pubid/jcgm/scheme.rb +0 -60
- data/lib/pubid/jis/components/code.rb +0 -59
- data/lib/pubid/jis/identifiers/base.rb +0 -72
- data/lib/pubid/jis/scheme.rb +0 -49
- data/lib/pubid/nist/components/publisher.rb +0 -24
- data/lib/pubid/nist/scheme.rb +0 -199
- data/lib/pubid/oiml/scheme.rb +0 -46
- data/lib/pubid/plateau/scheme.rb +0 -45
- data/lib/pubid/rendering/base.rb +0 -73
- data/lib/pubid/rendering/common.rb +0 -211
- data/lib/pubid/rendering/format.rb +0 -25
- data/lib/pubid/sae/scheme.rb +0 -47
- data/lib/pubid/scheme.rb +0 -207
data/lib/pubid/nist/parser.rb
CHANGED
|
@@ -7,432 +7,26 @@ module Pubid
|
|
|
7
7
|
# Parser class for NIST identifiers
|
|
8
8
|
# Single Responsibility: Parsing NIST identifier syntax
|
|
9
9
|
class Parser < Parslet::Parser
|
|
10
|
-
# Class-level parse method with preprocessing
|
|
11
|
-
# Handles data quality normalization before parsing
|
|
12
|
-
# Named explicitly to avoid conflict with Parslet's built-in parse method
|
|
13
|
-
def self.class_parse_with_preprocessing(input)
|
|
14
|
-
# Apply legacy update_codes normalization first, before any other preprocessing
|
|
15
|
-
cleaned = Core::UpdateCodes.apply(input.to_s.strip, :nist)
|
|
16
|
-
|
|
17
|
-
# Fix lowercase publisher at start
|
|
18
|
-
cleaned = cleaned.sub(/^nbs\b/i, "NBS")
|
|
19
|
-
cleaned = cleaned.sub(/^nist\b/i, "NIST")
|
|
20
|
-
|
|
21
|
-
# Fix publisher+series concatenation: "NISTIR" → "NIST IR", "NBSIR" → "NBS IR"
|
|
22
|
-
# Must come after lowercase publisher fix to catch "nistir" → "NISTIR" → "NIST IR"
|
|
23
|
-
cleaned = cleaned.gsub(
|
|
24
|
-
/^(NBS|NIST)(IR|FIPS|GCR|HB|MONO|MP|NCSTAR|NSRDS)/i, '\1 \2'
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# Fix lowercase series (ir, sp, tn, etc.)
|
|
28
|
-
cleaned = cleaned.sub(/\b(ir|sp|tn|hb|fips|ams|vts)\b/i, &:upcase)
|
|
29
|
-
|
|
30
|
-
# Normalize LC to LCIRC (single definition of truth)
|
|
31
|
-
# Pattern: "LC" followed by space/dot/end should become "LCIRC"
|
|
32
|
-
# But don't change if already "LCIRC"
|
|
33
|
-
cleaned = cleaned.gsub(/\bLC\b(?!IRC)/, "LCIRC")
|
|
34
|
-
|
|
35
|
-
# Combine "NBS LCIRC" with space into "NBS.LCIRC" ONLY when followed by supplement marker
|
|
36
|
-
# This allows the circ_supplement_identifier rule to match the pattern
|
|
37
|
-
# Only apply to supplement cases, not regular LCIRC identifiers
|
|
38
|
-
cleaned = cleaned.gsub(/\bNBS LCIRC\b(?=.*\b(?:supp?|sup\+|r\d+\/)\d)/,
|
|
39
|
-
"NBS.LCIRC")
|
|
40
|
-
|
|
41
|
-
# Convert MR format LCIRC supplements to space-separated format
|
|
42
|
-
# "NBS.LCIRC.145r11/1925" → "NBS LCIRC 145r11/1925" (convert series dot to space)
|
|
43
|
-
cleaned = cleaned.gsub(/\bNBS\.LCIRC\.(\d+r\d+\/\d{4})/,
|
|
44
|
-
"NBS LCIRC \\1")
|
|
45
|
-
# Also handle without year: "NBS.LCIRC.145r11" → "NBS LCIRC 145r11"
|
|
46
|
-
cleaned = cleaned.gsub(/\bNBS\.LCIRC\.(\d+r\d+)\b/, "NBS LCIRC \\1")
|
|
47
|
-
|
|
48
|
-
# Fix Roman numerals: "1011-I-2" → keep as is, but fix spaces: "1011-I-2 0" → "1011-I-2.0"
|
|
49
|
-
cleaned = cleaned.gsub(/([-\d]+[IVX]+[-\d]+)\s+(\d+)/, '\1.\2')
|
|
50
|
-
|
|
51
|
-
# Fix rev without space: "126rev2013" → "126 rev2013" (separate number from rev+year)
|
|
52
|
-
# BUT preserve edition+revision patterns: "e2rev1908" stays as-is
|
|
53
|
-
cleaned = cleaned.gsub(/(?<!e)(\d)(rev\d{4})/, '\1 \2')
|
|
54
|
-
|
|
55
|
-
# Fix LCIRC revision with slash and year: "145r6/1925" → "145 r6/1925"
|
|
56
|
-
# BUT NOT for LCIRC series (keep "NBS LCIRC 145r11/1925" as-is for parser)
|
|
57
|
-
# The circ_supplement_identifier rule expects "145r11" (no space)
|
|
58
|
-
unless cleaned.include?("LCIRC") || cleaned.include?("CIRC")
|
|
59
|
-
cleaned = cleaned.gsub(/(\d)(r\d+\/\d{4})/, '\1 \2')
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Fix LCIRC revision with just year (no slash): "1128r1995" → "1128 r1995"
|
|
63
|
-
# BUT preserve edition+revision patterns: "13e2rev1908" stays as-is
|
|
64
|
-
# AND preserve month abbreviations in patterns like "107-Mar1985" (ar1985 contains 'r')
|
|
65
|
-
# Use word boundary to ensure 'r' is standalone, not part of a month name
|
|
66
|
-
# AND preserve "rv" (revision year) patterns: "1013rv1953" stays as-is
|
|
67
|
-
cleaned = cleaned.gsub(/\b(r(?!v)\d{4})\b/, ' \1')
|
|
68
|
-
|
|
69
|
-
# Fix month in revision: "4743rJun1992" → "4743 rJun1992" (NEW)
|
|
70
|
-
cleaned = cleaned.gsub(/(\d)(r[A-Z][a-z]{2,8}\d{4})/, '\1 \2')
|
|
71
|
-
# REMOVED: Revision with 1-2 digits + lowercase letter preprocessing
|
|
72
|
-
# This is now handled by the more comprehensive fix at lines 131-142
|
|
73
|
-
# which keeps "22r1a" together (no space) for second_number pattern matching
|
|
74
|
-
|
|
75
|
-
# CRITICAL: Normalize lowercase letter suffix to uppercase
|
|
76
|
-
# Fix dash-letter pattern: "6529-a" → "6529-A" (FIXED - was incorrect)
|
|
77
|
-
# BUT preserve lowercase for NCSTAR series when letter is followed by volume (e.g., "1-1av1")
|
|
78
|
-
cleaned = cleaned.gsub(/(\d)-([a-z])$/) { "#{$1}-#{$2.upcase}" }
|
|
79
|
-
|
|
80
|
-
# Fix direct letter suffix (no dash): "378g" → "378G", "1000a" → "1000A"
|
|
81
|
-
# MUST come after dash pattern to avoid conflicts
|
|
82
|
-
# Fix letter suffix at end: "1011-A" → "1011A", "97-3b" → "97-3B"
|
|
83
|
-
# CRITICAL: Exclude r+digit pattern (e.g., "73-197r", "6945r") from this conversion
|
|
84
|
-
# These should remain as lowercase for edition pattern matching
|
|
85
|
-
# Only match single letter at end, not part of words like "index", "sec", etc.
|
|
86
|
-
cleaned = cleaned.gsub(/(\d)([a-z&&[^r]])$/) { "#{$1}#{$2.upcase}" }
|
|
87
|
-
# Also fix r+letter patterns (e.g., "22r1a" → "22r1A") separately
|
|
88
|
-
cleaned = cleaned.gsub(/(\d)(r)(\d+)([a-z])$/) do
|
|
89
|
-
"#{$1}#{$2}#{$3}#{$4.upcase}"
|
|
90
|
-
end
|
|
91
|
-
# NEW: Fix letter suffix before r (e.g., "53ar1" → "53Ar1")
|
|
92
|
-
# For patterns like NIST SP 800-53ar1 where letter is between number and revision
|
|
93
|
-
cleaned = cleaned.gsub(/(\d)([a-z])(r\d)/) { "#{$1}#{$2.upcase}#{$3}" }
|
|
94
|
-
# NOTE: Removed uppercase letter before r rule - it was breaking 800-56Ar2 parsing
|
|
95
|
-
# The parser should handle 56Ar2 as a single unit (letter suffix + revision)
|
|
96
|
-
|
|
97
|
-
# Fix letter suffix before volume: "1-2bv1" → "1-2Bv1" (MR format)
|
|
98
|
-
# BUT preserve "rv" (revision year) patterns: "1013rv1953" stays as-is
|
|
99
|
-
# Skip for NCSTAR to preserve lowercase letters (patterns like "1-1av1" should stay lowercase)
|
|
100
|
-
is_ncstar = cleaned.include?("NCSTAR")
|
|
101
|
-
unless is_ncstar
|
|
102
|
-
cleaned = cleaned.gsub(/(\d)([a-z&&[^r]])(v\d+)/) do
|
|
103
|
-
"#{$1}#{$2.upcase}#{$3}"
|
|
104
|
-
end
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Fix space before volume number: "80-2073 2" → "80-2073 v2" (Session 219)
|
|
108
|
-
# This handles NBS IR 80-2073 2 and NBS IR 80-2073 3 as volume identifiers
|
|
109
|
-
cleaned = cleaned.gsub(/(\d{2}-\d{4})\s+(\d)$/, '\1 v\2')
|
|
110
|
-
|
|
111
|
-
# Fix draft with number: "8270-draft2" → "8270 -draft 2" (Session 253)
|
|
112
|
-
# Space BEFORE dash AND after draft to separate it from report_number
|
|
113
|
-
cleaned = cleaned.gsub(/(\d)-draft(\d)/, '\1 -draft \2')
|
|
114
|
-
|
|
115
|
-
# NEW FIX 2: Draft without dash: "8270draft2" → "8270 -draft 2"
|
|
116
|
-
# More lenient pattern to catch missing dash before draft
|
|
117
|
-
cleaned = cleaned.gsub(/(\d)draft(\d)/, '\1 -draft \2')
|
|
118
|
-
|
|
119
|
-
# Fix supplement typo: "154suprev" → "154supprev" (Session 219)
|
|
120
|
-
cleaned = cleaned.gsub(/(\d)suprev/, '\1supprev')
|
|
121
|
-
|
|
122
|
-
# Fix letter suffix + revision before draft: "140Cr1-draft2" → "140C r1-draft2" (Session 221)
|
|
123
|
-
# Must be BEFORE general draft preprocessing at line 47
|
|
124
|
-
cleaned = cleaned.gsub(/(\d{2,})([A-Z])(r\d+)([-\s]draft\d*)/,
|
|
125
|
-
'\1\2 \3\4')
|
|
126
|
-
|
|
127
|
-
# Convert Roman numeral volumes to Arabic per NIST spec (page 7)
|
|
128
|
-
# "1011-I-2.0" → "1011 v1 ver2.0"
|
|
129
|
-
# "1011-II-1.0" → "1011 v2 ver1.0"
|
|
130
|
-
cleaned = cleaned.gsub(/(\d+)-([IVX]+)-(\d+(?:\.\d+)*)/) do
|
|
131
|
-
number = $1
|
|
132
|
-
roman = $2
|
|
133
|
-
version_part = $3
|
|
134
|
-
|
|
135
|
-
# Convert Roman to Arabic
|
|
136
|
-
arabic = roman_to_arabic(roman)
|
|
137
|
-
|
|
138
|
-
# Convert to volume+version format
|
|
139
|
-
"#{number} v#{arabic} ver#{version_part}"
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
# Fix LCIRC supplement with slash and year: "118supp3/1926" → "118 supp3/1926"
|
|
143
|
-
cleaned = cleaned.gsub(/(\d)(supp\d+\/\d{4})/, '\1 \2')
|
|
144
|
-
|
|
145
|
-
# Fix Pt pattern: "800-57Pt3r1" → "800-57 pt3 r1"
|
|
146
|
-
cleaned = cleaned.gsub(/(\d)Pt(\d+)(r\d+)/, '\1 pt\2 \3')
|
|
147
|
-
|
|
148
|
-
# Fix version patterns: "ver1e2006" → "ver1 e2006", "ver2v1" → "ver2 v1"
|
|
149
|
-
cleaned = cleaned.gsub(/(\d)ver(\d)/, '\1 ver \2')
|
|
150
|
-
cleaned = cleaned.gsub(/ver(\d+)e(\d{4})/, 'ver\1 e\2')
|
|
151
|
-
cleaned = cleaned.gsub(/ver(\d+)v(\d+)/, 'ver\1 v\2')
|
|
152
|
-
|
|
153
|
-
# Fix dotted version: separate from number "268v1.1" → "268 v1.1"
|
|
154
|
-
cleaned = cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
|
|
155
|
-
|
|
156
|
-
# CRITICAL: Now separate dotted versions from preceding digits: "268v1.1" → "268 v1.1" (NEW)
|
|
157
|
-
cleaned = cleaned.gsub(/(\d)(v\d+\.\d+)/, '\1 \2')
|
|
158
|
-
|
|
159
|
-
# NEW: Separate version from number AND convert spaces to dots in one step
|
|
160
|
-
cleaned = cleaned.gsub(/(\d)(v\d+)\s+(\d+)$/, '\1 \2.\3') # Two-part: "268v1 1" → "268 v1.1"
|
|
161
|
-
cleaned = cleaned.gsub(/(\d)(v\d+)\s+(\d+)\s+(\d+)$/, '\1 \2.\3.\4') # Three-part: "63v1 0 1" → "63 v1.0.1"
|
|
162
|
-
|
|
163
|
-
# Fix volume ranges: "535v2a-l" → "535 v2a-l", "535v2m-z" → "535 v2m-z"
|
|
164
|
-
cleaned = cleaned.gsub(/(\d)(v\d+[a-z]-[a-z])/, '\1 \2')
|
|
165
|
-
|
|
166
|
-
# NEW: Fix volume with uppercase letter: "48v3B" → "48 v3B" (Session 220)
|
|
167
|
-
cleaned = cleaned.gsub(/(\d)(v\d+[A-Z])/, '\1 \2')
|
|
168
|
-
|
|
169
|
-
# NEW: Fix volume ranges with uppercase: "v2A-L" → "v2a-l" (normalize to lowercase) (Session 220)
|
|
170
|
-
cleaned = cleaned.gsub(/(v\d+)([A-Z])-([A-Z])/, '\1\2-\3'.downcase)
|
|
171
|
-
|
|
172
|
-
# NEW: Fix edition with "ed." suffix: "2006ed." → "e2006" (V1 compatibility)
|
|
173
|
-
# Pattern appears at end of identifier: "NIST SP 260-162 2006ed."
|
|
174
|
-
cleaned = cleaned.gsub(/(\d{4})ed\./, 'e\1')
|
|
175
|
-
|
|
176
|
-
# CRITICAL: Fix revision attached to number BEFORE update patterns!
|
|
177
|
-
# "8115r1-upd" → "8115 r1-upd" so that later "r1-upd" → "r1 -upd" works
|
|
178
|
-
# But preserve r6/1925 format (don't add space before slash/year)
|
|
179
|
-
# And preserve 300-8r1/upd format (don't separate r1/upd)
|
|
180
|
-
# ENHANCED: Also handle r1a (revision with letter suffix) - "800-22r1a" → "800-22r1A"
|
|
181
|
-
# FIXED: When there's a letter suffix, keep together for second_number pattern
|
|
182
|
-
# CRITICAL: Use \d{1,2} instead of \d+ to limit revision to 1-2 digits, allowing [a-z] to match
|
|
183
|
-
# First rule: Match r+digit+letter (keep together)
|
|
184
|
-
cleaned = cleaned.gsub(/(\d+)(r\d{1,2})([a-z])(?=-|[A-Z]|$)/) do
|
|
185
|
-
num = $1
|
|
186
|
-
rev = $2
|
|
187
|
-
letter = $3
|
|
188
|
-
# Keep together when there's a letter suffix
|
|
189
|
-
"#{num}#{rev}#{letter.upcase}"
|
|
190
|
-
end
|
|
191
|
-
# Second rule: Match r+digit WITHOUT letter suffix
|
|
192
|
-
# CRITICAL: Use negative lookahead (?![a-zA-Z]) to avoid matching when there's a letter
|
|
193
|
-
# PRESERVE compact format (no space) when at end of string (NIST SP 800-53r4)
|
|
194
|
-
# ADD space only when followed by: dash+uppercase, uppercase letter, or /upd, /errata, /insert
|
|
195
|
-
cleaned = cleaned.gsub(/(\d+)(r\d{1,2})(?![a-zA-Z])(?=[A-Z]|-(?=[A-Z])|\/(?:upd|errata|insert))/) do
|
|
196
|
-
num = $1
|
|
197
|
-
rev = $2
|
|
198
|
-
# Add space when followed by dash+uppercase, uppercase, or update keyword
|
|
199
|
-
"#{num} #{rev}"
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
# Fix spaces in version/volume numbers: "v1 1" → "v1.1", "1011-I-2 0" → "1011-I-2.0"
|
|
203
|
-
# ENHANCED to handle multiple spaces: "v1 0 1" → "v1.0.1", "v1 0 2" → "v1.0.2"
|
|
204
|
-
# FIXED: Pattern must start with "v" or digit to avoid matching "rev 2013" as "v" + " 2013"
|
|
205
|
-
# CRITICAL: Added word boundary \b to prevent matching "v" within "rev"
|
|
206
|
-
# CRITICAL FIX: Use \b to ensure match starts at word boundary
|
|
207
|
-
cleaned = cleaned.gsub(/(\b(?:v|\d)[v\d]*[-A-Z]*)\s+(\d+)\s+(\d+)/, '\1.\2.\3') # Three parts
|
|
208
|
-
# CRITICAL FIX: Use \b to ensure match starts at word boundary
|
|
209
|
-
cleaned = cleaned.gsub(/(\b(?:v|\d)[v\d]*)\s+(\d+)/, '\1.\2') # Two parts
|
|
210
|
-
|
|
211
|
-
# Fix update patterns: ensure space before -upd or /upd (not just at end)
|
|
212
|
-
# Enhanced to handle optional digits after upd: -upd, -upd1, /upd, /upd1
|
|
213
|
-
cleaned = cleaned.gsub(/(\d+)-upd(\d*)/, '\1 -upd\2') # -upd or -upd1
|
|
214
|
-
cleaned = cleaned.gsub(/(\d+)\/upd(\d*)/, '\1 /upd\2') # /upd or /upd1
|
|
215
|
-
cleaned = cleaned.gsub(/([a-z]\d+)-upd/, '\1 -upd') # r1-upd → r1 -upd
|
|
216
|
-
cleaned = cleaned.gsub(/([a-z]\d+)\/upd/, '\1 /upd') # After revision: r1/upd → r1 /upd
|
|
217
|
-
|
|
218
|
-
# NEW FIX 3: MR format with letter suffix before update: "8286C-upd1" → "8286C -upd1"
|
|
219
|
-
# Must handle uppercase letters before -upd in MR format
|
|
220
|
-
cleaned = cleaned.gsub(/(\d+[A-Z])-upd(\d*)/, '\1 -upd\2') # Letter suffix + update
|
|
221
|
-
cleaned = cleaned.gsub(/(\d+[A-Z])\/upd(\d*)/, '\1 /upd\2') # Letter suffix + /upd variant
|
|
222
10
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# Fix supplement patterns: ensure space before supplement (4th variant)
|
|
231
|
-
cleaned = cleaned.gsub(/(\d)(sup\d+)/, '\1 \2') # 100-2sup1 → 100-2 sup1
|
|
232
|
-
# Fix supplement patterns: ensure space before supplement (5th variant)
|
|
233
|
-
cleaned = cleaned.gsub(/(\d)(sup\d+\b)/, '\1 \2') # 100-2sup1 → 100-2 sup1
|
|
234
|
-
|
|
235
|
-
# Fix letter suffix + supplement: "378Gsup" → "378Gsupp" (NEW for LCIRC patterns)
|
|
236
|
-
# Normalize "sup" to "supp" for letter suffix patterns to match circ_supplement_identifier rule
|
|
237
|
-
cleaned = cleaned.gsub(/(\d+[A-Z])sup(\b)/, '\1supp\2') # 378Gsup → 378Gsupp
|
|
238
|
-
|
|
239
|
-
# Fix LCIRC supplement without letter suffix: "118sup12/1926" → "118supp12/1926"
|
|
240
|
-
# Normalize "sup" to "supp" for LCIRC patterns to match circ_supplement_identifier rule
|
|
241
|
-
cleaned = cleaned.gsub(/(\d+)sup(\d+\/\d{4})/, '\1supp\2') # 118sup12/1926 → 118supp12/1926
|
|
242
|
-
|
|
243
|
-
# REMOVED: Revision letter patterns that add space before revision with letter
|
|
244
|
-
# These conflicted with the fix at lines 131-142 which keeps "22r1a" together
|
|
245
|
-
# for second_number pattern matching. The comprehensive fix now handles:
|
|
246
|
-
# - "800-22r1a" → "800-22r1A" (kept together, uppercase letter)
|
|
247
|
-
# - "800-22r1" → "800-22 r1" (space added when no letter suffix)
|
|
248
|
-
|
|
249
|
-
# Fix number with letter suffix followed by standalone 'r': "56ar" → "56a r" (NEW)
|
|
250
|
-
cleaned = cleaned.gsub(/(\d[a-z])r\b/, '\1 r')
|
|
251
|
-
|
|
252
|
-
# Fix revision followed by language code: "r1es" → "r1 es", "r1pt" → "r1 pt" (NEW)
|
|
253
|
-
cleaned = cleaned.gsub(/(r\d+)(es|pt|chi|viet|port|esp)\b/, '\1 \2')
|
|
254
|
-
|
|
255
|
-
# Fix MR format translation codes: ".spa" → " spa", ".por" → " por", ".ind" → " ind" (NEW)
|
|
256
|
-
# Prevents 3-letter translation codes from being parsed as letter suffixes
|
|
257
|
-
# "NIST.SP.1262.spa" → "NIST.SP.1262 spa" (convert dot to space)
|
|
258
|
-
cleaned = cleaned.gsub(/^([A-Z]+)\.SP\.(\d+)\.([a-z]{2,4})$/,
|
|
259
|
-
'\1.SP.\2 \3')
|
|
260
|
-
cleaned = cleaned.gsub(/^([A-Z]+)\.([A-Z]+)\.(\d+)\.([a-z]{2,4})$/,
|
|
261
|
-
'\1.\2.\3 \4')
|
|
262
|
-
|
|
263
|
-
# ENHANCEMENT 1: Edition year normalization (-YYYY → eYYYY)
|
|
264
|
-
# Per NIST spec, trailing -YYYY should normalize to eYYYY format
|
|
265
|
-
# Pattern: number (optionally with non-e letter suffix) followed by dash and 4-digit year
|
|
266
|
-
# Examples: "330-2019" → "330e2019", "304a-2017" → "304Ae2017"
|
|
267
|
-
# Must NOT match existing edition patterns like "11e2-1915" (e2 is edition, -1915 is separate)
|
|
268
|
-
# Must be at end or before space to avoid breaking number-number patterns like "800-53"
|
|
269
|
-
# Negative lookbehind (?<![eE-]) prevents matching after e/E or dash (avoids e2-1915 and 105-1-1990)
|
|
270
|
-
# EXCLUSION: Do NOT convert -YYYY for HB series (handbooks) - preserve original format
|
|
271
|
-
# Example: "NBS HB 130-1979" should stay as "NBS HB 130-1979" (not convert to e1979)
|
|
272
|
-
# EXCLUSION: Do NOT convert -YYYY when preceded by "e\d+" (edition+year pattern like "44e2-1955")
|
|
273
|
-
# EXCLUSION: Only convert years in NBS (1901-1988) or NIST (1988-2099) range
|
|
274
|
-
# Numbers outside this range are part numbers, not edition years (e.g., SP 250-1039)
|
|
275
|
-
# Use a more specific pattern: only convert when NOT preceded by "e" + digits (edition)
|
|
276
|
-
# AND only convert when year is in valid range (1901-2099)
|
|
277
|
-
cleaned = cleaned.gsub(/(?<!e\d)(?<![eE-])(\d(?:[A-DF-Z]?))-(\d{4})(?=\s|$)/) do |match|
|
|
278
|
-
prefix = $1 # Number with optional letter
|
|
279
|
-
year = $2.to_i
|
|
280
|
-
# Only convert to edition format if year is in valid range
|
|
281
|
-
if year.between?(1901, 2099)
|
|
282
|
-
"#{prefix}e#{year}"
|
|
283
|
-
else
|
|
284
|
-
match # Keep dash format for part numbers (e.g., 250-1039)
|
|
285
|
-
end
|
|
286
|
-
end
|
|
287
|
-
# Revert the conversion for HB series to preserve -YYYY format
|
|
288
|
-
# Matches both "HB 130e1979" and "HB 105-1e1990" patterns
|
|
289
|
-
# Use [^:\s.]*? (exclude dots) to avoid consuming MR format dot separators
|
|
290
|
-
# This prevents "NIST.HB.135e2022" from being incorrectly reverted
|
|
291
|
-
cleaned = cleaned.gsub(/\b(HB|HB\s+)[^:\s.]*?(\d+)e(\d{4})(?=\s|$)/,
|
|
292
|
-
'\1\2-\3')
|
|
293
|
-
# Revert the conversion for OWMWP series to preserve date format MM-DD-YYYY
|
|
294
|
-
# OWMWP uses date as the number: "06-13-2018" (not an edition)
|
|
295
|
-
# Pattern: "OWMWP 06-13e2018" → "OWMWP 06-13-2018"
|
|
296
|
-
cleaned = cleaned.gsub(
|
|
297
|
-
/\b(OWMWP|OWMWP\s*)[^:\s]*?(\d{2})-(\d{2})e(\d{4})(?=\s|$)/, '\1\2-\3-\4'
|
|
298
|
-
)
|
|
299
|
-
# Revert the conversion for RPT series to preserve year range format YYYY-YYYY
|
|
300
|
-
# Report series uses year ranges as the number: "1946-1947" (not an edition)
|
|
301
|
-
# Pattern: "RPT 1946e1947" → "RPT 1946-1947"
|
|
302
|
-
# Note: This must check that first year < second year (forward range)
|
|
303
|
-
cleaned = cleaned.gsub(/\b(RPT|RPT\s*)([^:\s]*?)(\d{4})e(\d{4})(?=\s|$)/) do |match|
|
|
304
|
-
prefix = $1 # "RPT" or "RPT "
|
|
305
|
-
separator = $2 # "." or "" or other non-colon, non-space chars
|
|
306
|
-
first_year = $3.to_i
|
|
307
|
-
second_year = $4.to_i
|
|
308
|
-
# Only revert if first < second (year range like 1946-1947)
|
|
309
|
-
if first_year < second_year
|
|
310
|
-
"#{prefix}#{separator}#{first_year}-#{second_year}"
|
|
311
|
-
else
|
|
312
|
-
match # Keep e format for editions like e2018e2019
|
|
313
|
-
end
|
|
314
|
-
end
|
|
315
|
-
|
|
316
|
-
# ENHANCEMENT 2: Version normalization (v1.1 → ver1.1, Ver. 2.0 → ver2.0)
|
|
317
|
-
# Normalize short v format to verbose ver format per NIST spec
|
|
318
|
-
# Already handled in version rule, but normalize in preprocessing for consistency
|
|
319
|
-
|
|
320
|
-
# CRITICAL: MR format version normalization must come BEFORE general v normalization
|
|
321
|
-
# Pattern: "NIST.SP.500-281-v1.0" → "NIST.SP.500-281.ver1.0"
|
|
322
|
-
# This allows report_number to match "500-281" and version rule to match ".ver1.0"
|
|
323
|
-
cleaned = cleaned.gsub(/-v(\d+\.\d+)/, '.ver\1')
|
|
324
|
-
|
|
325
|
-
# Handle Ver. with period: "Ver. 2.0" → "ver2.0" (remove period and space)
|
|
326
|
-
cleaned = cleaned.gsub(/\bVer\.\s+(\d+(?:\.\d+)*)/, 'ver\1')
|
|
327
|
-
# Handle verbose "v" to "ver": "v1.1" → "ver1.1" (only with dots - versions have dots)
|
|
328
|
-
cleaned = cleaned.gsub(/\bv(\d+\.\d+(?:\.\d+)*)/, 'ver\1')
|
|
329
|
-
|
|
330
|
-
# Fix uppercase P for part: "428P1" → "428 p1", "647P2" → "647 p2" (NEW)
|
|
331
|
-
cleaned = cleaned.gsub(/(\d)P(\d)/, '\1 p\2')
|
|
332
|
-
|
|
333
|
-
# Normalize part notation: "p1" → "pt1", "n1" → "pt1" for consistency
|
|
334
|
-
# This handles patterns like "61p1" → "61pt1" and "467n1" → "467pt1"
|
|
335
|
-
# MUST come AFTER uppercase P normalization
|
|
336
|
-
# EXCLUDE pattern: {number}p{digit}{4-digit-year} like "28p11969" (part + year, not part notation)
|
|
337
|
-
# Use negative lookahead to avoid matching when p/n + digit is followed by exactly 4 digits (year)
|
|
338
|
-
cleaned = cleaned.gsub(/\b([pn])(\d+)(?!\d{4}\b)/, 'pt\2')
|
|
339
|
-
|
|
340
|
-
# Fix complex part patterns in MR format: ensure space before part
|
|
341
|
-
cleaned = cleaned.gsub(/(\d)([pP]\d+)/, '\1 \2') # .467p1adde1 → .467 p1adde1, 800-57p1 → 800-57 p1
|
|
342
|
-
|
|
343
|
-
# Fix CRPL-F series: ensure space after series (e.g., "CRPL-F-B150" → "CRPL-F-B 150")
|
|
344
|
-
cleaned = cleaned.gsub(/(NBS CRPL-F-[AB])(\d)/, '\1 \2')
|
|
345
|
-
cleaned = cleaned.gsub(/(CRPL-F-[AB])(\d)/, '\1 \2')
|
|
346
|
-
|
|
347
|
-
# Extract volume from number: "17-917v3" → "17-917 v3", "1-1v1" → "1-1 v1"
|
|
348
|
-
# Pattern: digits-digits followed by v and digits (GCR, NCSTAR patterns)
|
|
349
|
-
# MUST be specific to avoid breaking existing "v1.1" patterns
|
|
350
|
-
cleaned = cleaned.gsub(/(\d+-\d+)(v\d+)(?![.\d])/, '\1 \2') # Negative lookahead for dots
|
|
351
|
-
|
|
352
|
-
# pd_suffix rule handles " 2pd" directly (space >> digits >> str("pd"))
|
|
353
|
-
# No preprocessing needed - adding space before "pd" breaks the parser
|
|
354
|
-
|
|
355
|
-
# Fix "Suppl" with space: "955 Suppl" → "955Suppl"
|
|
356
|
-
cleaned = cleaned.gsub(/(\d+)\s+Suppl\b/, '\1Suppl')
|
|
357
|
-
|
|
358
|
-
# Fix verbose "Version" format: " Version 2" → " ver 2"
|
|
359
|
-
cleaned = cleaned.gsub(/\s+Version\s+(\d+)/, ' ver \1')
|
|
360
|
-
|
|
361
|
-
# Fix verbose "Revision" format: " Revision (r)" → " r"
|
|
362
|
-
cleaned = cleaned.gsub(/\s+Revision\s+\(r\)/, " r")
|
|
363
|
-
|
|
364
|
-
# Fix verbose "rev YYYY" format: "126 rev 2013" → "126r2013"
|
|
365
|
-
# Removes space between number and "rev", and converts to "r" prefix
|
|
366
|
-
# Handles patterns like "NIST SP 260-126 rev 2013" → "NIST SP 260-126r2013"
|
|
367
|
-
cleaned = cleaned.gsub(/(\d+)\s+rev\s+(\d{4})/, '\1r\2')
|
|
368
|
-
|
|
369
|
-
# Fix historical "report ;" format: "NBS report ; 8079" → "NBS RPT 8079"
|
|
370
|
-
# The semicolon and "report" (spelled out) are historical formats
|
|
371
|
-
cleaned = cleaned.gsub(/\breport\s*;\s*/, "RPT ")
|
|
372
|
-
cleaned = cleaned.gsub(/\breport\b/, "RPT")
|
|
373
|
-
|
|
374
|
-
# REMOVED: Incorrect dot preprocessing that treated dots as number separators
|
|
375
|
-
# This was semantically wrong - dots are PART separators in NIST!
|
|
376
|
-
# DELETE: cleaned = cleaned.gsub(/(\d{3,})\.(\d{1,4})(?=\s|$)/, '\1_\2')
|
|
377
|
-
|
|
378
|
-
# REMOVED: Incorrect space-to-underscore that treated as single number
|
|
379
|
-
# DELETE: cleaned = cleaned.gsub(/(\d{3,})\s+(\d{1,2})$/, '\1_\2')
|
|
380
|
-
|
|
381
|
-
# Detect format before parsing
|
|
382
|
-
format = detect_format(input.to_s)
|
|
383
|
-
|
|
384
|
-
# Use parslet parser instance
|
|
385
|
-
result = new.parse(cleaned)
|
|
11
|
+
# Class-level parse method with preprocessing.
|
|
12
|
+
# Delegates all string normalization to Nist::Preprocessor, then
|
|
13
|
+
# feeds the cleaned string to the Parslet grammar and stamps the
|
|
14
|
+
# detected format onto the parse tree.
|
|
15
|
+
def self.class_parse_with_preprocessing(input)
|
|
16
|
+
result = Preprocessor.new(input).call
|
|
17
|
+
parsed = new.parse(result.cleaned)
|
|
386
18
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
# For array results, merge all hashes into one
|
|
392
|
-
# This handles cases where identifier rule returns multiple components (e.g., compound_series + edition)
|
|
393
|
-
merged = result.inject({}) do |acc, hash|
|
|
19
|
+
if parsed.is_a?(Hash)
|
|
20
|
+
parsed.merge(parsed_format: result.format)
|
|
21
|
+
elsif parsed.is_a?(Array)
|
|
22
|
+
merged = parsed.each_with_object({}) do |hash, acc|
|
|
394
23
|
next acc unless hash.is_a?(Hash)
|
|
395
24
|
|
|
396
|
-
acc.merge(hash)
|
|
25
|
+
acc.merge!(hash)
|
|
397
26
|
end
|
|
398
|
-
merged.merge(parsed_format: format)
|
|
399
|
-
else
|
|
400
|
-
result
|
|
401
|
-
end
|
|
402
|
-
end
|
|
403
|
-
|
|
404
|
-
# Detect format from input string
|
|
405
|
-
# :mr if contains dots (machine-readable: NIST.SP.800-53)
|
|
406
|
-
# :short otherwise (default: NIST SP 800-53)
|
|
407
|
-
def self.detect_format(input)
|
|
408
|
-
# Check if it has dot separators (MR format pattern)
|
|
409
|
-
# Patterns include:
|
|
410
|
-
# - "NIST.SP.800-53" (publisher.series.number)
|
|
411
|
-
# - "FIPS.46e1977" (series.numberWithEdition)
|
|
412
|
-
# - "NBS.HB.28pt1e1969" (publisher.series.part.edition)
|
|
413
|
-
# Key indicator: dots between components instead of spaces
|
|
414
|
-
if input.include?(".") && !input.match?(/\s/)
|
|
415
|
-
:mr
|
|
27
|
+
merged.merge(parsed_format: result.format)
|
|
416
28
|
else
|
|
417
|
-
|
|
418
|
-
end
|
|
419
|
-
end
|
|
420
|
-
|
|
421
|
-
# Convert Roman numerals to Arabic numbers
|
|
422
|
-
# I→1, II→2, III→3, IV→4, V→5, VI→6, VII→7, VIII→8, IX→9, X→10
|
|
423
|
-
def self.roman_to_arabic(roman)
|
|
424
|
-
case roman
|
|
425
|
-
when "I" then "1"
|
|
426
|
-
when "II" then "2"
|
|
427
|
-
when "III" then "3"
|
|
428
|
-
when "IV" then "4"
|
|
429
|
-
when "V" then "5"
|
|
430
|
-
when "VI" then "6"
|
|
431
|
-
when "VII" then "7"
|
|
432
|
-
when "VIII" then "8"
|
|
433
|
-
when "IX" then "9"
|
|
434
|
-
when "X" then "10"
|
|
435
|
-
else roman # Fallback for unexpected patterns
|
|
29
|
+
parsed
|
|
436
30
|
end
|
|
437
31
|
end
|
|
438
32
|
|
|
@@ -622,6 +216,14 @@ module Pubid
|
|
|
622
216
|
# Note: Preprocessing converts content inside parentheses to uppercase
|
|
623
217
|
# Use specific patterns to avoid consuming other parenthetical content
|
|
624
218
|
(digits.as(:number) >> str("(") >> (str("SP") | str("PT") | str("ES")).as(:language_code) >> str(")")) |
|
|
219
|
+
# Number with letter suffix followed by revision (e.g., "8278Ar1", "256Ar1930")
|
|
220
|
+
# CRITICAL: Must come BEFORE digits_with_suffix because number_suffix's
|
|
221
|
+
# str("r").absent? guard rejects any letter followed by 'r' (would
|
|
222
|
+
# otherwise drop the letter and parse "256" + "Ar1930" as garbage).
|
|
223
|
+
(digits.as(:number) >>
|
|
224
|
+
upper_letter.as(:letter_suffix) >>
|
|
225
|
+
str("r") >>
|
|
226
|
+
digits.as(:revision_id)).as(:number_with_letter_revision) |
|
|
625
227
|
# Regular number with optional suffix (original) - includes letters like "A"
|
|
626
228
|
digits_with_suffix
|
|
627
229
|
).as(:first_number)
|
|
@@ -634,6 +236,11 @@ module Pubid
|
|
|
634
236
|
# NEW: Exclude "draft" keyword
|
|
635
237
|
str("draft").absent? >>
|
|
636
238
|
(
|
|
239
|
+
# Trailing bare supplement marker on a compound second number
|
|
240
|
+
# (e.g. "800-53sup") so it isn't split into "53s" + "up". Builder
|
|
241
|
+
# strips the marker and sets supplement="" (canonical "sup").
|
|
242
|
+
(digits >> (str("supp") | str("sup")) >>
|
|
243
|
+
(digit.absent? >> letter.absent?)) |
|
|
637
244
|
# NEW: Revision pattern with U+letter suffix (e.g., "22r1Ua", "38Ua")
|
|
638
245
|
# MUST come BEFORE general letter suffix to avoid matching just "U" from "Ua"
|
|
639
246
|
(digits >> str("r") >> digits >> str("U") >> lower_letter) |
|
|
@@ -721,6 +328,22 @@ module Pubid
|
|
|
721
328
|
).as(:date)
|
|
722
329
|
end
|
|
723
330
|
|
|
331
|
+
# ISO date token (YYYY-MM-DD) for date-style identifiers
|
|
332
|
+
rule(:iso_date) do
|
|
333
|
+
(match("[0-9]").repeat(4, 4).as(:date_year) >> dash >>
|
|
334
|
+
match("[0-9]").repeat(2, 2).as(:date_month) >> dash >>
|
|
335
|
+
match("[0-9]").repeat(2, 2).as(:date_day))
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Date-style identifier with no series (e.g. "NIST 2022-04-15 001",
|
|
339
|
+
# "NIST.2022-04-15.001" — DOI 10.6028/NIST.2022-04-15.001)
|
|
340
|
+
rule(:dated_identifier) do
|
|
341
|
+
hash_prefix.maybe >>
|
|
342
|
+
publisher >> (space | dot) >>
|
|
343
|
+
iso_date.as(:dated_date) >> (space | dot) >>
|
|
344
|
+
match("[0-9]").repeat(1).as(:dated_seq)
|
|
345
|
+
end
|
|
346
|
+
|
|
724
347
|
# LEGACY EDITION PATTERNS (for backward compatibility during migration)
|
|
725
348
|
# These will be gradually replaced as we migrate to proper Edition/Date components
|
|
726
349
|
rule(:legacy_edition) do
|
|
@@ -757,6 +380,8 @@ module Pubid
|
|
|
757
380
|
rule(:report_number) do
|
|
758
381
|
first_number >>
|
|
759
382
|
(
|
|
383
|
+
# Underscore edition-year (space-form mirror of mr_identifier): "1648_2009"
|
|
384
|
+
(str("_") >> digits.as(:edition_year)) |
|
|
760
385
|
# Month abbreviation as edition (e.g., 107-Mar1985, 11-Jan1925)
|
|
761
386
|
# MUST BE FIRST to catch -MonthYear patterns before they're
|
|
762
387
|
# incorrectly parsed as other alternatives
|
|
@@ -776,6 +401,13 @@ module Pubid
|
|
|
776
401
|
(str("U") >> lower_letter.as(:letter_suffix_extra)) |
|
|
777
402
|
upper_letter
|
|
778
403
|
).as(:letter_suffix)).as(:letter_number) |
|
|
404
|
+
# Dash-SEPARATED letter suffix on a numbered part (e.g., 21-4-B,
|
|
405
|
+
# 173-1-B for FIPS, and the reparseable form of letter_number's own
|
|
406
|
+
# rendered output 200-30-B). The month_abbrev guard keeps FIPS date
|
|
407
|
+
# forms like 11-1-Sep1977 with the earlier patterns; the trailing
|
|
408
|
+
# guard restricts to a single clean letter so GCR -200-30B is unaffected.
|
|
409
|
+
(dash >> digits.as(:letter_base) >> dash >> month_abbrev.absent? >>
|
|
410
|
+
upper_letter.as(:letter_suffix) >> (letter | digit).absent?).as(:letter_number) |
|
|
779
411
|
# Edition dash-year pattern (e.g., -1979 for handbooks like "NBS HB 130-1979")
|
|
780
412
|
# Matches any 4-digit sequence - the builder decides if it's a year or second_number
|
|
781
413
|
(dash >> match("[0-9]").repeat(4,
|
|
@@ -875,8 +507,18 @@ module Pubid
|
|
|
875
507
|
(
|
|
876
508
|
digits.as(:update_number).maybe >>
|
|
877
509
|
(dash >>
|
|
878
|
-
|
|
879
|
-
|
|
510
|
+
(
|
|
511
|
+
# Real updates: 4-digit year + optional 2-digit month
|
|
512
|
+
# (e.g. /Upd1-2015, /Upd3-202102).
|
|
513
|
+
(match("[0-9]").repeat(4, 4).as(:update_year) >>
|
|
514
|
+
match("[0-9]").repeat(2, 2).as(:update_month)) |
|
|
515
|
+
# Fallback: capture the whole digit run as the year. Handles the
|
|
516
|
+
# unpadded CIRC/LCIRC supplement form pubid emits, where year and
|
|
517
|
+
# revision are fused without a 2-digit month boundary
|
|
518
|
+
# (e.g. /Upd1-19256 = 1925 + revision 6). Keeps these strings
|
|
519
|
+
# re-parseable so generated ids round-trip.
|
|
520
|
+
match("[0-9]").repeat(4).as(:update_year)
|
|
521
|
+
)
|
|
880
522
|
).maybe
|
|
881
523
|
).as(:update)
|
|
882
524
|
end
|
|
@@ -941,8 +583,8 @@ module Pubid
|
|
|
941
583
|
(str("(") >> match('\w').repeat(3, 3).as(:translation) >> str(")")) |
|
|
942
584
|
# Space-prefix format: " spa"
|
|
943
585
|
(space >> match('\w').repeat(3, 3).as(:translation)) |
|
|
944
|
-
# Dot-prefix format: ".spa" (machine-readable)
|
|
945
|
-
(dot >> match('\w').repeat(3, 3).as(:translation))
|
|
586
|
+
# Dot-prefix format: ".spa" (machine-readable), optional leading space: " .spa"
|
|
587
|
+
(space.maybe >> dot >> match('\w').repeat(3, 3).as(:translation))
|
|
946
588
|
end
|
|
947
589
|
|
|
948
590
|
# Public draft suffix - for patterns like 2pd, 3pd
|
|
@@ -1010,6 +652,7 @@ module Pubid
|
|
|
1010
652
|
# Try compound series first (longest match), then publisher + simple series
|
|
1011
653
|
rule(:identifier) do
|
|
1012
654
|
circ_supplement_identifier |
|
|
655
|
+
dated_identifier |
|
|
1013
656
|
mr_identifier |
|
|
1014
657
|
(
|
|
1015
658
|
# Compound series (includes publisher in series name)
|