pubid 1.15.19 → 2.0.0.pre.alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.adoc +2041 -53
- data/archived-gems/pubid-ccsds/update_codes.yaml +1 -0
- data/archived-gems/pubid-iec/stages.yaml +129 -0
- data/archived-gems/pubid-iec/update_codes.yaml +67 -0
- data/archived-gems/pubid-ieee/update_codes.yaml +104 -0
- data/archived-gems/pubid-iso/stages.yaml +106 -0
- data/archived-gems/pubid-iso/update_codes.yaml +4 -0
- data/archived-gems/pubid-itu/i18n.yaml +13 -0
- data/archived-gems/pubid-itu/series.yaml +42 -0
- data/archived-gems/pubid-nist/publishers.yaml +6 -0
- data/archived-gems/pubid-nist/series.yaml +121 -0
- data/archived-gems/pubid-nist/stages.yaml +16 -0
- data/archived-gems/pubid-nist/update_codes.yaml +93 -0
- data/archived-gems/pubid-plateau/update_codes.yaml +6 -0
- data/data/ccsds/update_codes.yaml +1 -0
- data/data/iec/update_codes.yaml +67 -0
- data/data/ieee/update_codes.yaml +104 -0
- data/data/iso/update_codes.yaml +21 -0
- data/data/nist/update_codes.yaml +89 -0
- data/data/plateau/update_codes.yaml +6 -0
- data/lib/pubid/amca/builder.rb +176 -0
- data/lib/pubid/amca/identifier.rb +57 -0
- data/lib/pubid/amca/identifiers/base.rb +64 -0
- data/lib/pubid/amca/identifiers/interpretation.rb +51 -0
- data/lib/pubid/amca/identifiers/publication.rb +47 -0
- data/lib/pubid/amca/identifiers/standard.rb +22 -0
- data/lib/pubid/amca/identifiers.rb +12 -0
- data/lib/pubid/amca/parser.rb +153 -0
- data/lib/pubid/amca/scheme.rb +16 -0
- data/lib/pubid/amca/single_identifier.rb +33 -0
- data/lib/pubid/amca/urn_generator.rb +50 -0
- data/lib/pubid/amca.rb +26 -0
- data/lib/pubid/ansi/builder.rb +52 -0
- data/lib/pubid/ansi/identifier.rb +55 -0
- data/lib/pubid/ansi/identifiers/american_national_standard.rb +12 -0
- data/lib/pubid/ansi/identifiers/standard.rb +16 -0
- data/lib/pubid/ansi/identifiers.rb +11 -0
- data/lib/pubid/ansi/parser.rb +91 -0
- data/lib/pubid/ansi/scheme.rb +15 -0
- data/lib/pubid/ansi/single_identifier.rb +45 -0
- data/lib/pubid/ansi/urn_generator.rb +76 -0
- data/lib/pubid/ansi.rb +27 -0
- data/lib/pubid/api/builder.rb +85 -0
- data/lib/pubid/api/components/code.rb +9 -0
- data/lib/pubid/api/identifier.rb +68 -0
- data/lib/pubid/api/identifiers/base.rb +24 -0
- data/lib/pubid/api/identifiers/bulletin.rb +15 -0
- data/lib/pubid/api/identifiers/continuous_operations_standard.rb +15 -0
- data/lib/pubid/api/identifiers/mpms.rb +44 -0
- data/lib/pubid/api/identifiers/publication.rb +15 -0
- data/lib/pubid/api/identifiers/recommended_practice.rb +15 -0
- data/lib/pubid/api/identifiers/specification.rb +15 -0
- data/lib/pubid/api/identifiers/standard.rb +15 -0
- data/lib/pubid/api/identifiers/technical_report.rb +15 -0
- data/lib/pubid/api/identifiers/typeless_standard.rb +27 -0
- data/lib/pubid/api/parser.rb +140 -0
- data/lib/pubid/api/scheme.rb +66 -0
- data/lib/pubid/api/single_identifier.rb +46 -0
- data/lib/pubid/api/urn_generator.rb +41 -0
- data/lib/pubid/api.rb +17 -0
- data/lib/pubid/ashrae/builder.rb +498 -0
- data/lib/pubid/ashrae/identifier.rb +57 -0
- data/lib/pubid/ashrae/identifiers/addenda_package.rb +46 -0
- data/lib/pubid/ashrae/identifiers/addendum.rb +55 -0
- data/lib/pubid/ashrae/identifiers/base.rb +23 -0
- data/lib/pubid/ashrae/identifiers/combined_addenda.rb +51 -0
- data/lib/pubid/ashrae/identifiers/errata.rb +40 -0
- data/lib/pubid/ashrae/identifiers/guideline.rb +38 -0
- data/lib/pubid/ashrae/identifiers/interpretation.rb +39 -0
- data/lib/pubid/ashrae/identifiers/standard.rb +38 -0
- data/lib/pubid/ashrae/identifiers.rb +16 -0
- data/lib/pubid/ashrae/parser.rb +724 -0
- data/lib/pubid/ashrae/scheme.rb +53 -0
- data/lib/pubid/ashrae/single_identifier.rb +23 -0
- data/lib/pubid/ashrae/supplement_identifier.rb +23 -0
- data/lib/pubid/ashrae/urn_generator.rb +59 -0
- data/lib/pubid/ashrae.rb +21 -0
- data/lib/pubid/asme/builder.rb +153 -0
- data/lib/pubid/asme/components/code.rb +18 -0
- data/lib/pubid/asme/identifier.rb +61 -0
- data/lib/pubid/asme/identifiers/base.rb +70 -0
- data/lib/pubid/asme/identifiers/standard.rb +12 -0
- data/lib/pubid/asme/identifiers.rb +10 -0
- data/lib/pubid/asme/parser.rb +308 -0
- data/lib/pubid/asme/scheme.rb +37 -0
- data/lib/pubid/asme/single_identifier.rb +29 -0
- data/lib/pubid/asme/urn_generator.rb +133 -0
- data/lib/pubid/asme.rb +21 -0
- data/lib/pubid/astm/builder.rb +159 -0
- data/lib/pubid/astm/components/code.rb +33 -0
- data/lib/pubid/astm/identifier.rb +92 -0
- data/lib/pubid/astm/identifiers/adjunct.rb +21 -0
- data/lib/pubid/astm/identifiers/base.rb +13 -0
- data/lib/pubid/astm/identifiers/data_series.rb +25 -0
- data/lib/pubid/astm/identifiers/iso_dual_published.rb +74 -0
- data/lib/pubid/astm/identifiers/manual.rb +40 -0
- data/lib/pubid/astm/identifiers/monograph.rb +25 -0
- data/lib/pubid/astm/identifiers/research_report.rb +18 -0
- data/lib/pubid/astm/identifiers/standard.rb +52 -0
- data/lib/pubid/astm/identifiers/technical_report.rb +23 -0
- data/lib/pubid/astm/identifiers/work_in_progress.rb +21 -0
- data/lib/pubid/astm/parser.rb +244 -0
- data/lib/pubid/astm/scheme.rb +55 -0
- data/lib/pubid/astm/single_identifier.rb +25 -0
- data/lib/pubid/astm/urn_generator.rb +99 -0
- data/lib/pubid/astm.rb +38 -0
- data/lib/pubid/bsi/builder.rb +1483 -0
- data/lib/pubid/bsi/components/code.rb +11 -0
- data/lib/pubid/bsi/components/date.rb +11 -0
- data/lib/pubid/bsi/components/publisher.rb +11 -0
- data/lib/pubid/bsi/components/type.rb +11 -0
- data/lib/pubid/bsi/identifier.rb +87 -0
- data/lib/pubid/bsi/identifiers/addendum_document.rb +64 -0
- data/lib/pubid/bsi/identifiers/adopted_european_norm.rb +95 -0
- data/lib/pubid/bsi/identifiers/adopted_international_standard.rb +82 -0
- data/lib/pubid/bsi/identifiers/aerospace_standard.rb +118 -0
- data/lib/pubid/bsi/identifiers/amendment.rb +40 -0
- data/lib/pubid/bsi/identifiers/base.rb +11 -0
- data/lib/pubid/bsi/identifiers/british_industrial_practice.rb +27 -0
- data/lib/pubid/bsi/identifiers/british_standard.rb +33 -0
- data/lib/pubid/bsi/identifiers/bundled_identifier.rb +114 -0
- data/lib/pubid/bsi/identifiers/committee_document.rb +51 -0
- data/lib/pubid/bsi/identifiers/consolidated_identifier.rb +152 -0
- data/lib/pubid/bsi/identifiers/corrigendum.rb +28 -0
- data/lib/pubid/bsi/identifiers/detailed_specification.rb +69 -0
- data/lib/pubid/bsi/identifiers/disc.rb +56 -0
- data/lib/pubid/bsi/identifiers/draft_document.rb +71 -0
- data/lib/pubid/bsi/identifiers/electronic_book.rb +52 -0
- data/lib/pubid/bsi/identifiers/expert_commentary.rb +47 -0
- data/lib/pubid/bsi/identifiers/explanatory_supplement.rb +82 -0
- data/lib/pubid/bsi/identifiers/flex.rb +61 -0
- data/lib/pubid/bsi/identifiers/handbook.rb +39 -0
- data/lib/pubid/bsi/identifiers/index.rb +62 -0
- data/lib/pubid/bsi/identifiers/method.rb +76 -0
- data/lib/pubid/bsi/identifiers/national_annex.rb +73 -0
- data/lib/pubid/bsi/identifiers/practice_guide.rb +27 -0
- data/lib/pubid/bsi/identifiers/publicly_available_specification.rb +79 -0
- data/lib/pubid/bsi/identifiers/published_document.rb +79 -0
- data/lib/pubid/bsi/identifiers/section.rb +62 -0
- data/lib/pubid/bsi/identifiers/set.rb +46 -0
- data/lib/pubid/bsi/identifiers/standalone_amendment.rb +40 -0
- data/lib/pubid/bsi/identifiers/supplement_document.rb +51 -0
- data/lib/pubid/bsi/identifiers/supplementary_index.rb +81 -0
- data/lib/pubid/bsi/identifiers/technical_specification.rb +79 -0
- data/lib/pubid/bsi/identifiers/test_method.rb +67 -0
- data/lib/pubid/bsi/identifiers/value_added_publication.rb +52 -0
- data/lib/pubid/bsi/identifiers.rb +52 -0
- data/lib/pubid/bsi/model.rb +196 -0
- data/lib/pubid/bsi/parser.rb +659 -0
- data/lib/pubid/bsi/scheme.rb +243 -0
- data/lib/pubid/bsi/single_identifier.rb +129 -0
- data/lib/pubid/bsi/urn_generator.rb +84 -0
- data/lib/pubid/bsi.rb +32 -0
- data/lib/pubid/builder/base.rb +138 -0
- data/lib/pubid/bundled_identifier.rb +126 -0
- data/lib/pubid/ccsds/builder.rb +56 -0
- data/lib/pubid/ccsds/identifier.rb +84 -0
- data/lib/pubid/ccsds/identifiers/base.rb +89 -0
- data/lib/pubid/ccsds/identifiers/base_BASE_88929.rb +70 -0
- data/lib/pubid/ccsds/identifiers/corrigendum.rb +39 -0
- data/lib/pubid/ccsds/identifiers.rb +10 -0
- data/lib/pubid/ccsds/parser.rb +71 -0
- data/lib/pubid/ccsds/scheme.rb +57 -0
- data/lib/pubid/ccsds/single_identifier.rb +77 -0
- data/lib/pubid/ccsds/supplement_identifier.rb +33 -0
- data/lib/pubid/ccsds/urn_generator.rb +115 -0
- data/lib/pubid/ccsds.rb +21 -0
- data/lib/pubid/cen_cenelec/builder.rb +330 -0
- data/lib/pubid/cen_cenelec/identifier.rb +52 -0
- data/lib/pubid/cen_cenelec/identifiers/adopted_european_norm.rb +40 -0
- data/lib/pubid/cen_cenelec/identifiers/amendment.rb +29 -0
- data/lib/pubid/cen_cenelec/identifiers/base.rb +75 -0
- data/lib/pubid/cen_cenelec/identifiers/cen_report.rb +28 -0
- data/lib/pubid/cen_cenelec/identifiers/cen_workshop_agreement.rb +27 -0
- data/lib/pubid/cen_cenelec/identifiers/cenelec_harmonization_document.rb +28 -0
- data/lib/pubid/cen_cenelec/identifiers/consolidated_identifier.rb +61 -0
- data/lib/pubid/cen_cenelec/identifiers/corrigendum.rb +35 -0
- data/lib/pubid/cen_cenelec/identifiers/european_norm.rb +41 -0
- data/lib/pubid/cen_cenelec/identifiers/european_prestandard.rb +37 -0
- data/lib/pubid/cen_cenelec/identifiers/european_specification.rb +28 -0
- data/lib/pubid/cen_cenelec/identifiers/fragment.rb +22 -0
- data/lib/pubid/cen_cenelec/identifiers/guide.rb +27 -0
- data/lib/pubid/cen_cenelec/identifiers/harmonization_document.rb +27 -0
- data/lib/pubid/cen_cenelec/identifiers/technical_report.rb +27 -0
- data/lib/pubid/cen_cenelec/identifiers/technical_specification.rb +35 -0
- data/lib/pubid/cen_cenelec/identifiers.rb +32 -0
- data/lib/pubid/cen_cenelec/parser.rb +144 -0
- data/lib/pubid/cen_cenelec/scheme.rb +164 -0
- data/lib/pubid/cen_cenelec/single_identifier.rb +130 -0
- data/lib/pubid/cen_cenelec/supplement_identifier.rb +48 -0
- data/lib/pubid/cen_cenelec/urn_generator.rb +129 -0
- data/lib/pubid/cen_cenelec.rb +21 -0
- data/lib/pubid/cie/builder.rb +399 -0
- data/lib/pubid/cie/components/code.rb +72 -0
- data/lib/pubid/cie/components/language.rb +58 -0
- data/lib/pubid/cie/identifier.rb +71 -0
- data/lib/pubid/cie/identifiers/bundle.rb +20 -0
- data/lib/pubid/cie/identifiers/conference.rb +32 -0
- data/lib/pubid/cie/identifiers/corrigendum.rb +40 -0
- data/lib/pubid/cie/identifiers/dual_published.rb +41 -0
- data/lib/pubid/cie/identifiers/identical.rb +64 -0
- data/lib/pubid/cie/identifiers/joint_published.rb +52 -0
- data/lib/pubid/cie/identifiers/standard.rb +58 -0
- data/lib/pubid/cie/identifiers/supplement.rb +45 -0
- data/lib/pubid/cie/identifiers/tutorial_bundle.rb +20 -0
- data/lib/pubid/cie/identifiers.rb +17 -0
- data/lib/pubid/cie/parser.rb +347 -0
- data/lib/pubid/cie/scheme.rb +64 -0
- data/lib/pubid/cie/single_identifier.rb +30 -0
- data/lib/pubid/cie/supplement_identifier.rb +26 -0
- data/lib/pubid/cie/urn_generator.rb +123 -0
- data/lib/pubid/cie.rb +28 -0
- data/lib/pubid/components/code.rb +33 -0
- data/lib/pubid/components/date.rb +49 -0
- data/lib/pubid/components/edition.rb +32 -0
- data/lib/pubid/components/factory.rb +50 -0
- data/lib/pubid/components/language.rb +37 -0
- data/lib/pubid/components/locality.rb +10 -0
- data/lib/pubid/components/publisher.rb +36 -0
- data/lib/pubid/components/stage.rb +54 -0
- data/lib/pubid/components/type.rb +58 -0
- data/lib/pubid/components/typed_stage.rb +59 -0
- data/lib/pubid/components.rb +16 -0
- data/lib/pubid/core/pattern_doc_generator.rb +272 -0
- data/lib/pubid/core/update_codes.rb +77 -0
- data/lib/pubid/core.rb +8 -0
- data/lib/pubid/csa/builder.rb +671 -0
- data/lib/pubid/csa/components/code.rb +9 -0
- data/lib/pubid/csa/components.rb +9 -0
- data/lib/pubid/csa/composite_identifier.rb +27 -0
- data/lib/pubid/csa/identifier.rb +513 -0
- data/lib/pubid/csa/identifiers/base.rb +133 -0
- data/lib/pubid/csa/identifiers/bundled.rb +125 -0
- data/lib/pubid/csa/identifiers/canadian_adopted.rb +82 -0
- data/lib/pubid/csa/identifiers/cec.rb +129 -0
- data/lib/pubid/csa/identifiers/combined.rb +130 -0
- data/lib/pubid/csa/identifiers/csa_adopted.rb +78 -0
- data/lib/pubid/csa/identifiers/package.rb +65 -0
- data/lib/pubid/csa/identifiers/series.rb +127 -0
- data/lib/pubid/csa/identifiers/standard.rb +10 -0
- data/lib/pubid/csa/identifiers.rb +17 -0
- data/lib/pubid/csa/parser.rb +445 -0
- data/lib/pubid/csa/scheme.rb +44 -0
- data/lib/pubid/csa/single_identifier.rb +30 -0
- data/lib/pubid/csa/urn_generator.rb +80 -0
- data/lib/pubid/csa/wrapper_identifier.rb +31 -0
- data/lib/pubid/csa.rb +25 -0
- data/lib/pubid/etsi/builder.rb +133 -0
- data/lib/pubid/etsi/components/code.rb +42 -0
- data/lib/pubid/etsi/components/version.rb +37 -0
- data/lib/pubid/etsi/components.rb +10 -0
- data/lib/pubid/etsi/identifier.rb +57 -0
- data/lib/pubid/etsi/identifiers/amendment.rb +15 -0
- data/lib/pubid/etsi/identifiers/base.rb +38 -0
- data/lib/pubid/etsi/identifiers/corrigendum.rb +15 -0
- data/lib/pubid/etsi/identifiers/etsi_standard.rb +19 -0
- data/lib/pubid/etsi/identifiers/supplement_identifier.rb +91 -0
- data/lib/pubid/etsi/identifiers.rb +14 -0
- data/lib/pubid/etsi/parser.rb +133 -0
- data/lib/pubid/etsi/scheme.rb +42 -0
- data/lib/pubid/etsi/urn_generator.rb +76 -0
- data/lib/pubid/etsi.rb +21 -0
- data/lib/pubid/export/auditor.rb +89 -0
- data/lib/pubid/export/data_class_exporter.rb +59 -0
- data/lib/pubid/export/exporter.rb +74 -0
- data/lib/pubid/export/flavor_exporter.rb +402 -0
- data/lib/pubid/export/ieee_exporter.rb +78 -0
- data/lib/pubid/export/itu_exporter.rb +66 -0
- data/lib/pubid/export/nist_exporter.rb +64 -0
- data/lib/pubid/export/registry_exporter.rb +90 -0
- data/lib/pubid/export/result.rb +97 -0
- data/lib/pubid/export/scheme_exporter.rb +70 -0
- data/lib/pubid/export.rb +18 -0
- data/lib/pubid/format_detector.rb +16 -0
- data/lib/pubid/format_registry.rb +42 -0
- data/lib/pubid/identifier.rb +242 -0
- data/lib/pubid/identifier_metadata.rb +148 -0
- data/lib/pubid/identifier_registry.rb +198 -0
- data/lib/pubid/idf/builder.rb +82 -0
- data/lib/pubid/idf/identifier.rb +129 -0
- data/lib/pubid/idf/identifiers/amendment.rb +27 -0
- data/lib/pubid/idf/identifiers/corrigendum.rb +27 -0
- data/lib/pubid/idf/identifiers/international_standard.rb +123 -0
- data/lib/pubid/idf/identifiers/reviewed_method.rb +100 -0
- data/lib/pubid/idf/identifiers.rb +13 -0
- data/lib/pubid/idf/parser.rb +143 -0
- data/lib/pubid/idf/scheme.rb +61 -0
- data/lib/pubid/idf/single_identifier.rb +19 -0
- data/lib/pubid/idf/supplement_identifier.rb +43 -0
- data/lib/pubid/idf/urn_generator.rb +84 -0
- data/lib/pubid/idf.rb +25 -0
- data/lib/pubid/iec/builder.rb +458 -0
- data/lib/pubid/iec/components/code.rb +60 -0
- data/lib/pubid/iec/components/consolidated_amendment.rb +59 -0
- data/lib/pubid/iec/components/publisher.rb +36 -0
- data/lib/pubid/iec/components/sheet.rb +32 -0
- data/lib/pubid/iec/components/trf_info.rb +38 -0
- data/lib/pubid/iec/components/vap_suffix.rb +41 -0
- data/lib/pubid/iec/identifier.rb +256 -0
- data/lib/pubid/iec/identifiers/amendment.rb +94 -0
- data/lib/pubid/iec/identifiers/base.rb +82 -0
- data/lib/pubid/iec/identifiers/component_specification.rb +39 -0
- data/lib/pubid/iec/identifiers/conformity_assessment.rb +39 -0
- data/lib/pubid/iec/identifiers/consolidated_identifier.rb +82 -0
- data/lib/pubid/iec/identifiers/corrigendum.rb +94 -0
- data/lib/pubid/iec/identifiers/fragment_identifier.rb +137 -0
- data/lib/pubid/iec/identifiers/guide.rb +104 -0
- data/lib/pubid/iec/identifiers/international_standard.rb +147 -0
- data/lib/pubid/iec/identifiers/interpretation_sheet.rb +104 -0
- data/lib/pubid/iec/identifiers/operational_document.rb +39 -0
- data/lib/pubid/iec/identifiers/publicly_available_specification.rb +101 -0
- data/lib/pubid/iec/identifiers/sheet_identifier.rb +62 -0
- data/lib/pubid/iec/identifiers/societal_technology_trend_report.rb +40 -0
- data/lib/pubid/iec/identifiers/systems_reference_document.rb +40 -0
- data/lib/pubid/iec/identifiers/technical_report.rb +132 -0
- data/lib/pubid/iec/identifiers/technical_specification.rb +132 -0
- data/lib/pubid/iec/identifiers/technology_report.rb +39 -0
- data/lib/pubid/iec/identifiers/test_report_form.rb +78 -0
- data/lib/pubid/iec/identifiers/vap_identifier.rb +73 -0
- data/lib/pubid/iec/identifiers/white_paper.rb +39 -0
- data/lib/pubid/iec/identifiers/working_document.rb +96 -0
- data/lib/pubid/iec/parser.rb +417 -0
- data/lib/pubid/iec/rendering_style.rb +113 -0
- data/lib/pubid/iec/scheme.rb +71 -0
- data/lib/pubid/iec/single_identifier.rb +80 -0
- data/lib/pubid/iec/supplement_identifier.rb +161 -0
- data/lib/pubid/iec/urn_generator.rb +79 -0
- data/lib/pubid/iec/urn_parser.rb +90 -0
- data/lib/pubid/iec.rb +85 -0
- data/lib/pubid/ieee/aiee/builder.rb +71 -0
- data/lib/pubid/ieee/aiee/identifier.rb +105 -0
- data/lib/pubid/ieee/aiee/parser.rb +130 -0
- data/lib/pubid/ieee/aiee.rb +11 -0
- data/lib/pubid/ieee/builder.rb +1237 -0
- data/lib/pubid/ieee/components/code.rb +102 -0
- data/lib/pubid/ieee/components/draft.rb +93 -0
- data/lib/pubid/ieee/components/relationship.rb +157 -0
- data/lib/pubid/ieee/components/typed_stage.rb +100 -0
- data/lib/pubid/ieee/identifier.rb +54 -0
- data/lib/pubid/ieee/identifiers/adopted_standard.rb +33 -0
- data/lib/pubid/ieee/identifiers/base.rb +591 -0
- data/lib/pubid/ieee/identifiers/conformance_identifier.rb +35 -0
- data/lib/pubid/ieee/identifiers/corrigendum.rb +37 -0
- data/lib/pubid/ieee/identifiers/csa_dual_published.rb +51 -0
- data/lib/pubid/ieee/identifiers/dual_identifier.rb +18 -0
- data/lib/pubid/ieee/identifiers/dual_published.rb +28 -0
- data/lib/pubid/ieee/identifiers/iec_ieee_copublished.rb +27 -0
- data/lib/pubid/ieee/identifiers/interpretation_identifier.rb +34 -0
- data/lib/pubid/ieee/identifiers/joint_development.rb +172 -0
- data/lib/pubid/ieee/identifiers/multi_numbered_identifier.rb +51 -0
- data/lib/pubid/ieee/identifiers/nesc/base.rb +56 -0
- data/lib/pubid/ieee/identifiers/nesc/draft.rb +28 -0
- data/lib/pubid/ieee/identifiers/nesc/handbook.rb +32 -0
- data/lib/pubid/ieee/identifiers/nesc/redline.rb +26 -0
- data/lib/pubid/ieee/identifiers/nesc/standard.rb +26 -0
- data/lib/pubid/ieee/identifiers/nesc.rb +15 -0
- data/lib/pubid/ieee/identifiers/parenthetical_identifier.rb +20 -0
- data/lib/pubid/ieee/identifiers/project_draft_identifier.rb +26 -0
- data/lib/pubid/ieee/identifiers/redlined_standard.rb +33 -0
- data/lib/pubid/ieee/identifiers/si_standard.rb +73 -0
- data/lib/pubid/ieee/identifiers/standard.rb +41 -0
- data/lib/pubid/ieee/identifiers/supplement_identifier.rb +23 -0
- data/lib/pubid/ieee/identifiers.rb +33 -0
- data/lib/pubid/ieee/ire/builder.rb +61 -0
- data/lib/pubid/ieee/ire/identifier.rb +58 -0
- data/lib/pubid/ieee/ire/parser.rb +91 -0
- data/lib/pubid/ieee/ire.rb +11 -0
- data/lib/pubid/ieee/nesc/builder.rb +101 -0
- data/lib/pubid/ieee/nesc/parser.rb +154 -0
- data/lib/pubid/ieee/nesc.rb +10 -0
- data/lib/pubid/ieee/parser.rb +1226 -0
- data/lib/pubid/ieee/scheme.rb +90 -0
- data/lib/pubid/ieee/typed_stages.rb +172 -0
- data/lib/pubid/ieee/urn_generator.rb +188 -0
- data/lib/pubid/ieee.rb +32 -0
- data/lib/pubid/ieee_debug.rb +31 -0
- data/lib/pubid/iho/builder.rb +37 -0
- data/lib/pubid/iho/identifier.rb +61 -0
- data/lib/pubid/iho/identifiers/base.rb +41 -0
- data/lib/pubid/iho/identifiers/bibliographic.rb +16 -0
- data/lib/pubid/iho/identifiers/circular_letter.rb +15 -0
- data/lib/pubid/iho/identifiers/miscellaneous.rb +16 -0
- data/lib/pubid/iho/identifiers/publication.rb +15 -0
- data/lib/pubid/iho/identifiers/standard.rb +15 -0
- data/lib/pubid/iho/identifiers.rb +14 -0
- data/lib/pubid/iho/parser.rb +68 -0
- data/lib/pubid/iho/scheme.rb +29 -0
- data/lib/pubid/iho/urn_generator.rb +29 -0
- data/lib/pubid/iho.rb +21 -0
- data/lib/pubid/iso/builder.rb +309 -0
- data/lib/pubid/iso/bundled_identifier.rb +85 -0
- data/lib/pubid/iso/combined_identifier.rb +22 -0
- data/lib/pubid/iso/components/code.rb +36 -0
- data/lib/pubid/iso/components/publisher.rb +60 -0
- data/lib/pubid/iso/components.rb +12 -0
- data/lib/pubid/iso/format_resolver.rb +45 -0
- data/lib/pubid/iso/identifier.rb +330 -0
- data/lib/pubid/iso/identifiers/addendum.rb +104 -0
- data/lib/pubid/iso/identifiers/amendment.rb +128 -0
- data/lib/pubid/iso/identifiers/base.rb +115 -0
- data/lib/pubid/iso/identifiers/corrigendum.rb +108 -0
- data/lib/pubid/iso/identifiers/data.rb +76 -0
- data/lib/pubid/iso/identifiers/directives.rb +59 -0
- data/lib/pubid/iso/identifiers/directives_supplement.rb +119 -0
- data/lib/pubid/iso/identifiers/extract.rb +30 -0
- data/lib/pubid/iso/identifiers/guide.rb +100 -0
- data/lib/pubid/iso/identifiers/international_standard.rb +168 -0
- data/lib/pubid/iso/identifiers/international_standardized_profile.rb +94 -0
- data/lib/pubid/iso/identifiers/international_workshop_agreement.rb +89 -0
- data/lib/pubid/iso/identifiers/pas.rb +93 -0
- data/lib/pubid/iso/identifiers/recommendation.rb +45 -0
- data/lib/pubid/iso/identifiers/supplement.rb +87 -0
- data/lib/pubid/iso/identifiers/tc_document.rb +108 -0
- data/lib/pubid/iso/identifiers/technical_report.rb +103 -0
- data/lib/pubid/iso/identifiers/technical_specification.rb +102 -0
- data/lib/pubid/iso/identifiers/technology_trends_assessments.rb +95 -0
- data/lib/pubid/iso/identifiers.rb +33 -0
- data/lib/pubid/iso/parser.rb +512 -0
- data/lib/pubid/iso/rendering_style.rb +120 -0
- data/lib/pubid/iso/scheme.rb +193 -0
- data/lib/pubid/iso/single_identifier.rb +64 -0
- data/lib/pubid/iso/supplement_identifier.rb +27 -0
- data/lib/pubid/iso/urn_generator.rb +426 -0
- data/lib/pubid/iso/urn_parser.rb +437 -0
- data/lib/pubid/iso/utilities.rb +86 -0
- data/lib/pubid/iso.rb +50 -0
- data/lib/pubid/itu/builder.rb +171 -0
- data/lib/pubid/itu/components/code.rb +39 -0
- data/lib/pubid/itu/components/sector.rb +35 -0
- data/lib/pubid/itu/components/series.rb +29 -0
- data/lib/pubid/itu/i18n.rb +9 -0
- data/lib/pubid/itu/i18n.yaml +30 -0
- data/lib/pubid/itu/identifier.rb +118 -0
- data/lib/pubid/itu/identifiers/amendment.rb +43 -0
- data/lib/pubid/itu/identifiers/annex.rb +74 -0
- data/lib/pubid/itu/identifiers/base.rb +154 -0
- data/lib/pubid/itu/identifiers/combined_identifier.rb +47 -0
- data/lib/pubid/itu/identifiers/corrigendum.rb +44 -0
- data/lib/pubid/itu/identifiers/recommendation.rb +16 -0
- data/lib/pubid/itu/identifiers/special_publication.rb +31 -0
- data/lib/pubid/itu/identifiers/supplement.rb +46 -0
- data/lib/pubid/itu/identifiers.rb +16 -0
- data/lib/pubid/itu/model.rb +111 -0
- data/lib/pubid/itu/parser.rb +225 -0
- data/lib/pubid/itu/scheme.rb +174 -0
- data/lib/pubid/itu/urn_generator.rb +105 -0
- data/lib/pubid/itu.rb +22 -0
- data/lib/pubid/jcgm/builder.rb +88 -0
- data/lib/pubid/jcgm/components/publisher.rb +20 -0
- data/lib/pubid/jcgm/components.rb +9 -0
- data/lib/pubid/jcgm/identifier.rb +54 -0
- data/lib/pubid/jcgm/identifiers/amendment.rb +35 -0
- data/lib/pubid/jcgm/identifiers/guide.rb +21 -0
- data/lib/pubid/jcgm/identifiers/gum_guide.rb +51 -0
- data/lib/pubid/jcgm/identifiers.rb +11 -0
- data/lib/pubid/jcgm/parser.rb +84 -0
- data/lib/pubid/jcgm/scheme.rb +60 -0
- data/lib/pubid/jcgm/single_identifier.rb +48 -0
- data/lib/pubid/jcgm/supplement_identifier.rb +16 -0
- data/lib/pubid/jcgm/urn_generator.rb +110 -0
- data/lib/pubid/jcgm.rb +31 -0
- data/lib/pubid/jis/builder.rb +124 -0
- data/lib/pubid/jis/components/code.rb +59 -0
- data/lib/pubid/jis/components.rb +9 -0
- data/lib/pubid/jis/identifier.rb +61 -0
- data/lib/pubid/jis/identifiers/amendment.rb +16 -0
- data/lib/pubid/jis/identifiers/base.rb +72 -0
- data/lib/pubid/jis/identifiers/explanation.rb +22 -0
- data/lib/pubid/jis/identifiers/japanese_industrial_standard.rb +16 -0
- data/lib/pubid/jis/identifiers/standard.rb +27 -0
- data/lib/pubid/jis/identifiers/technical_report.rb +31 -0
- data/lib/pubid/jis/identifiers/technical_specification.rb +31 -0
- data/lib/pubid/jis/identifiers.rb +17 -0
- data/lib/pubid/jis/parser.rb +109 -0
- data/lib/pubid/jis/scheme.rb +49 -0
- data/lib/pubid/jis/single_identifier.rb +37 -0
- data/lib/pubid/jis/supplement_identifier.rb +47 -0
- data/lib/pubid/jis/urn_generator.rb +25 -0
- data/lib/pubid/jis.rb +23 -0
- data/lib/pubid/lutaml/no_store_registration.rb +30 -0
- data/lib/pubid/nist/builder.rb +2269 -0
- data/lib/pubid/nist/components/code.rb +38 -0
- data/lib/pubid/nist/components/edition.rb +134 -0
- data/lib/pubid/nist/components/issue_number.rb +28 -0
- data/lib/pubid/nist/components/part.rb +77 -0
- data/lib/pubid/nist/components/publisher.rb +24 -0
- data/lib/pubid/nist/components/stage.rb +53 -0
- data/lib/pubid/nist/components/supplement.rb +188 -0
- data/lib/pubid/nist/components/translation.rb +42 -0
- data/lib/pubid/nist/components/update.rb +103 -0
- data/lib/pubid/nist/components/version.rb +35 -0
- data/lib/pubid/nist/components/volume.rb +32 -0
- data/lib/pubid/nist/components.rb +19 -0
- data/lib/pubid/nist/configuration.rb +77 -0
- data/lib/pubid/nist/identifier.rb +62 -0
- data/lib/pubid/nist/identifiers/base.rb +578 -0
- data/lib/pubid/nist/identifiers/circular.rb +68 -0
- data/lib/pubid/nist/identifiers/circular_supplement.rb +50 -0
- data/lib/pubid/nist/identifiers/commercial_standard.rb +41 -0
- data/lib/pubid/nist/identifiers/commercial_standard_emergency.rb +56 -0
- data/lib/pubid/nist/identifiers/commercial_standards_monthly.rb +56 -0
- data/lib/pubid/nist/identifiers/crpl_report.rb +132 -0
- data/lib/pubid/nist/identifiers/federal_information_processing_standards.rb +104 -0
- data/lib/pubid/nist/identifiers/grant_contractor_report.rb +35 -0
- data/lib/pubid/nist/identifiers/handbook.rb +50 -0
- data/lib/pubid/nist/identifiers/internal_report.rb +56 -0
- data/lib/pubid/nist/identifiers/letter_circular.rb +45 -0
- data/lib/pubid/nist/identifiers/miscellaneous_publication.rb +65 -0
- data/lib/pubid/nist/identifiers/monograph.rb +69 -0
- data/lib/pubid/nist/identifiers/ncstar.rb +41 -0
- data/lib/pubid/nist/identifiers/nsrds.rb +41 -0
- data/lib/pubid/nist/identifiers/owmwp.rb +35 -0
- data/lib/pubid/nist/identifiers/report.rb +67 -0
- data/lib/pubid/nist/identifiers/special_publication.rb +36 -0
- data/lib/pubid/nist/identifiers/technical_note.rb +90 -0
- data/lib/pubid/nist/identifiers.rb +33 -0
- data/lib/pubid/nist/parser.rb +1117 -0
- data/lib/pubid/nist/scheme.rb +199 -0
- data/lib/pubid/nist/supplement_identifier.rb +67 -0
- data/lib/pubid/nist/urn_generator.rb +133 -0
- data/lib/pubid/nist.rb +37 -0
- data/lib/pubid/oiml/builder.rb +189 -0
- data/lib/pubid/oiml/components/code.rb +20 -0
- data/lib/pubid/oiml/components.rb +9 -0
- data/lib/pubid/oiml/identifier.rb +61 -0
- data/lib/pubid/oiml/identifiers/amendment.rb +13 -0
- data/lib/pubid/oiml/identifiers/annex.rb +62 -0
- data/lib/pubid/oiml/identifiers/base.rb +36 -0
- data/lib/pubid/oiml/identifiers/basic_publication.rb +13 -0
- data/lib/pubid/oiml/identifiers/document.rb +13 -0
- data/lib/pubid/oiml/identifiers/expert_report.rb +13 -0
- data/lib/pubid/oiml/identifiers/guide.rb +13 -0
- data/lib/pubid/oiml/identifiers/recommendation.rb +13 -0
- data/lib/pubid/oiml/identifiers/seminar_report.rb +13 -0
- data/lib/pubid/oiml/identifiers/vocabulary.rb +13 -0
- data/lib/pubid/oiml/identifiers.rb +18 -0
- data/lib/pubid/oiml/parser.rb +173 -0
- data/lib/pubid/oiml/scheme.rb +46 -0
- data/lib/pubid/oiml/single_identifier.rb +90 -0
- data/lib/pubid/oiml/supplement_identifier.rb +43 -0
- data/lib/pubid/oiml/urn_generator.rb +64 -0
- data/lib/pubid/oiml.rb +26 -0
- data/lib/pubid/parser/common_parse_methods.rb +13 -0
- data/lib/pubid/parser/common_parse_rules.rb +56 -0
- data/lib/pubid/parser.rb +8 -0
- data/lib/pubid/parsers/base.rb +11 -0
- data/lib/pubid/parsers/mr_string.rb +93 -0
- data/lib/pubid/plateau/builder.rb +50 -0
- data/lib/pubid/plateau/identifier.rb +57 -0
- data/lib/pubid/plateau/identifiers/annex.rb +16 -0
- data/lib/pubid/plateau/identifiers/base.rb +51 -0
- data/lib/pubid/plateau/identifiers/handbook.rb +34 -0
- data/lib/pubid/plateau/identifiers/technical_report.rb +20 -0
- data/lib/pubid/plateau/identifiers.rb +12 -0
- data/lib/pubid/plateau/parser.rb +63 -0
- data/lib/pubid/plateau/scheme.rb +45 -0
- data/lib/pubid/plateau/supplement_identifier.rb +72 -0
- data/lib/pubid/plateau/urn_generator.rb +29 -0
- data/lib/pubid/plateau.rb +26 -0
- data/lib/pubid/renderers/base.rb +53 -0
- data/lib/pubid/renderers/directives_renderer.rb +61 -0
- data/lib/pubid/renderers/guide_renderer.rb +24 -0
- data/lib/pubid/renderers/human_readable.rb +70 -0
- data/lib/pubid/renderers/iwa_renderer.rb +20 -0
- data/lib/pubid/renderers/mr_string.rb +16 -0
- data/lib/pubid/renderers/supplement_renderer.rb +36 -0
- data/lib/pubid/renderers/urn.rb +11 -0
- data/lib/pubid/renderers.rb +14 -0
- data/lib/pubid/rendering/base.rb +73 -0
- data/lib/pubid/rendering/common.rb +211 -0
- data/lib/pubid/rendering/context.rb +159 -0
- data/lib/pubid/rendering/date.rb +27 -0
- data/lib/pubid/rendering/format.rb +25 -0
- data/lib/pubid/rendering/language.rb +21 -0
- data/lib/pubid/rendering/numbering.rb +24 -0
- data/lib/pubid/rendering/publisher.rb +25 -0
- data/lib/pubid/rendering/stage.rb +38 -0
- data/lib/pubid/rendering/supplement.rb +46 -0
- data/lib/pubid/rendering.rb +16 -0
- data/lib/pubid/sae/builder.rb +32 -0
- data/lib/pubid/sae/components/code.rb +9 -0
- data/lib/pubid/sae/components/date.rb +19 -0
- data/lib/pubid/sae/components/type.rb +19 -0
- data/lib/pubid/sae/components.rb +11 -0
- data/lib/pubid/sae/identifier.rb +37 -0
- data/lib/pubid/sae/identifiers/base.rb +42 -0
- data/lib/pubid/sae/identifiers.rb +9 -0
- data/lib/pubid/sae/parser.rb +55 -0
- data/lib/pubid/sae/scheme.rb +47 -0
- data/lib/pubid/sae/urn_generator.rb +38 -0
- data/lib/pubid/sae.rb +19 -0
- data/lib/pubid/scheme.rb +219 -0
- data/lib/pubid/urn_generator/base.rb +110 -0
- data/lib/pubid/utils/string_normalizer.rb +196 -0
- data/lib/pubid/utils.rb +7 -0
- data/lib/pubid/version.rb +3 -1
- data/lib/pubid.rb +137 -13
- data/lib/tasks/docs.rake +37 -0
- data/lib/tasks/export.rake +38 -0
- data/lib/tasks/website-data.json +7488 -0
- metadata +616 -171
- data/lib/pubid/registry.rb +0 -30
|
@@ -0,0 +1,1226 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "parslet"
|
|
4
|
+
|
|
5
|
+
module Pubid
|
|
6
|
+
module Ieee
|
|
7
|
+
# Parser class for IEEE identifiers
|
|
8
|
+
# Single Responsibility: Parsing IEEE identifier syntax
|
|
9
|
+
# Note: IEEE is extremely complex with many edge cases
|
|
10
|
+
class Parser < Parslet::Parser
|
|
11
|
+
# Basic building blocks
|
|
12
|
+
rule(:space) { str(" ") }
|
|
13
|
+
rule(:space?) { space.maybe }
|
|
14
|
+
rule(:dash) { str("-") }
|
|
15
|
+
rule(:dash?) { dash.maybe }
|
|
16
|
+
rule(:dot) { str(".") }
|
|
17
|
+
rule(:slash) { str("/") }
|
|
18
|
+
rule(:comma) { str(", ") }
|
|
19
|
+
rule(:digit) { match("[0-9]") }
|
|
20
|
+
rule(:digits) { digit.repeat(1) }
|
|
21
|
+
rule(:letter) { match("[A-Za-z]") }
|
|
22
|
+
rule(:upper) { match("[A-Z]") }
|
|
23
|
+
rule(:lower) { match("[a-z]") }
|
|
24
|
+
|
|
25
|
+
# Year pattern (4 digits starting with 19 or 20), optionally followed by letter(s)
|
|
26
|
+
# e.g. 2012, 201x, 2010a
|
|
27
|
+
rule(:year_digits) do
|
|
28
|
+
(str("19") | str("20")) >> digit.repeat(2,
|
|
29
|
+
2) >> lower.repeat(0,
|
|
30
|
+
2) >> digits.absent?
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Month patterns - numeric format (01-12)
|
|
34
|
+
rule(:month_numeric) do
|
|
35
|
+
(str("0") >> match("[1-9]")) | # 01-09
|
|
36
|
+
(str("1") >> match("[0-2]")) # 10-12
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Comprehensive date parsing
|
|
40
|
+
# Format 1: "September 2018" or "Sept 2018" (text month + year)
|
|
41
|
+
rule(:date_with_month_text) do
|
|
42
|
+
month_name.as(:month) >> space >> year_digits.as(:year)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Format 2: "2018-09" (year-numeric month)
|
|
46
|
+
rule(:date_with_month_numeric) do
|
|
47
|
+
year_digits.as(:year) >> dash >> month_numeric.as(:month)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Format 3: Just year "2018"
|
|
51
|
+
rule(:date_year_only) do
|
|
52
|
+
year_digits.as(:year)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Combined date rule - longest match first
|
|
56
|
+
rule(:date_standalone) do
|
|
57
|
+
date_with_month_text | date_with_month_numeric | date_year_only
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Month patterns
|
|
61
|
+
rule(:month_name) do
|
|
62
|
+
# Period-suffixed abbreviations (longest first)
|
|
63
|
+
str("Sept.") | str("Oct.") | str("Nov.") | str("Dec.") |
|
|
64
|
+
str("Jan.") | str("Feb.") | str("Mar.") | str("Apr.") |
|
|
65
|
+
str("Jun.") | str("Jul.") | str("Aug.") |
|
|
66
|
+
# Full month names
|
|
67
|
+
str("January") | str("February") | str("March") | str("April") |
|
|
68
|
+
str("May") | str("June") | str("July") | str("August") |
|
|
69
|
+
str("September") | str("October") | str("November") | str("December") |
|
|
70
|
+
# Non-period abbreviations
|
|
71
|
+
str("Jan") | str("Feb") | str("Mar") | str("Apr") | str("Jun") |
|
|
72
|
+
str("Jul") | str("Aug") | str("Sep") | str("Sept") | str("Oct") | str("Nov") | str("Dec")
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Organizations
|
|
76
|
+
rule(:organization) do
|
|
77
|
+
str("IEEE") | str("AIEE") | str("ANSI") | str("ASA") |
|
|
78
|
+
str("IEC") | str("ISO") | str("ASTM") | str("CSA") | str("ASME") |
|
|
79
|
+
str("NACE") | str("NSF") | str("ASHRAE") | str("NCTA") | str("AESC") |
|
|
80
|
+
str("EIA") # NEW Session 224: Add EIA support
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Complex organization prefixes (Category 5: ANSI Complex)
|
|
84
|
+
rule(:complex_org_prefix) do
|
|
85
|
+
str("ANSI/IEEE-ANS") | str("ANSI/IEEE") | str("ANSI")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Characteristic IEEE number patterns (without prefix)
|
|
89
|
+
# These patterns are distinctly IEEE even without "IEEE Std" prefix
|
|
90
|
+
rule(:characteristic_ieee_number) do
|
|
91
|
+
# C37.xxx series (power systems) - C followed by 2 digits, dot, more digits
|
|
92
|
+
(str("C") >> digit.repeat(2,
|
|
93
|
+
2) >> dot >> digits >> match("[a-z]").repeat.maybe) |
|
|
94
|
+
# 802.xxx series (networking) - 802 followed by dot, digits, optional letter suffix
|
|
95
|
+
(str("802") >> dot >> digits >> match("[a-z]").repeat.maybe) |
|
|
96
|
+
# P followed by digits (draft projects)
|
|
97
|
+
(str("P") >> digits.repeat(1))
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
rule(:publisher) do
|
|
101
|
+
complex_org_prefix.as(:publisher) | organization.as(:publisher)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
rule(:copublisher) do
|
|
105
|
+
# Three-way copublisher strings (treat as single unit, longest first)
|
|
106
|
+
str("/ISO/IEC").as(:copublisher) |
|
|
107
|
+
str("/IEC/ISO").as(:copublisher) |
|
|
108
|
+
# Two-way copublishers (original pattern)
|
|
109
|
+
(slash >> space? >> organization.as(:copublisher))
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Conformance document patterns (/Conformance01-2003, /Conformance02-2014)
|
|
113
|
+
# Allow optional space before slash for malformed inputs
|
|
114
|
+
rule(:conformance) do
|
|
115
|
+
(space? >> slash >> str("Conformance") >> match("[0-9]").repeat(1).as(:conf_number) >> dash >> year_digits.as(:conf_year)).as(:conformance)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# ASHRAE joint publication patterns (/ASHRAE Guideline 21-2012)
|
|
119
|
+
# Also handles /ASHRAE 21 without "Guideline"
|
|
120
|
+
rule(:ashrae_copub) do
|
|
121
|
+
(slash >> str("ASHRAE") >> space >>
|
|
122
|
+
(str("Guideline") >> space).maybe >>
|
|
123
|
+
digits.as(:ashrae_number) >>
|
|
124
|
+
(dash >> year_digits.as(:ashrae_year)).maybe).as(:ashrae_copub)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# IEEE cross-reference patterns (/C62.22.1-1996)
|
|
128
|
+
# References another IEEE standard from a specific series (e.g., C62, C37, C57)
|
|
129
|
+
rule(:ieee_crossref) do
|
|
130
|
+
(slash >> str("C") >> digits >> dot >> digits >> dot >> digits >> dash >> year_digits).as(:ieee_crossref)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Document number - support letters and digits, with optional prefix P
|
|
134
|
+
# Complex multi-part numbers like P11073-10404-10419 should be fully captured
|
|
135
|
+
# But simple cases like "623-1976" should not consume the dash before year
|
|
136
|
+
rule(:number) do
|
|
137
|
+
(str("P").maybe >>
|
|
138
|
+
(digits | upper).repeat(1) >> # The first component must be at least one digit
|
|
139
|
+
# Only consume dash+digits if followed by another dash+digits (multi-part pattern)
|
|
140
|
+
# OR if the digits don't look like a year (not 4 digits starting with 19/20)
|
|
141
|
+
# This prevents consuming "623-1976" as a number but allows "P11073-10404-10419"
|
|
142
|
+
(dash >> digits >> year_digits.absent? >> (dash >> digits).repeat).maybe >>
|
|
143
|
+
lower.maybe).as(:number)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Type - handle "No." and "No" (case-insensitive), longest first
|
|
147
|
+
rule(:type_word) do
|
|
148
|
+
str("Draft Std") | str("STD") | str("Standard") |
|
|
149
|
+
str("Std No.") | str("Std") | # Add "Std No." before "Std"
|
|
150
|
+
str("PTC") | # ASME Performance Test Code
|
|
151
|
+
(match("[Nn]") >> str("o.")) | (match("[Nn]") >> str("o")) |
|
|
152
|
+
str("No")
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Part and subpart - handle both dot and dash separators
|
|
156
|
+
rule(:part) do
|
|
157
|
+
(dot | dash) >> match("[0-9A-Za-z]").repeat(1).as(:part)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
rule(:subpart) do
|
|
161
|
+
(dot | dash | str("_")) >>
|
|
162
|
+
((str("REV") | str("Rev")).maybe >> match("[0-9a-z]").repeat(1) >>
|
|
163
|
+
(dot >> digits).maybe).as(:subpart)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Year component - updated to use comprehensive date parsing
|
|
167
|
+
rule(:year) do
|
|
168
|
+
(dot | dash) >> date_standalone >> str("(E)").maybe
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Draft patterns
|
|
172
|
+
rule(:draft_status) do
|
|
173
|
+
(str("Active Unapproved") | str("Unapproved") | str("Approved")) >> space
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
rule(:draft_prefix) do
|
|
177
|
+
space? >> (str("/") | str("_") | dash | space)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
rule(:draft_version) do
|
|
181
|
+
# Enhanced to handle multiple draft notation patterns
|
|
182
|
+
# D is optional to handle /08 style drafts (e.g., IEEE P1052/08)
|
|
183
|
+
(str("D") >> str("IS").absent?).maybe >> # Avoid matching "DIS" (ISO stage)
|
|
184
|
+
(
|
|
185
|
+
# Pattern: D3.1 (decimal with 1-2 digits on each side) - MOST COMMON, put first
|
|
186
|
+
# Also handles trailing letter: D7.3A, D2.0E
|
|
187
|
+
(match("[0-9]").repeat(1,
|
|
188
|
+
2) >> dot >> match("[0-9]").repeat(1,
|
|
189
|
+
2) >> lower.maybe) |
|
|
190
|
+
# Pattern: D.XX (decimal starting with dot) - e.g., D.19
|
|
191
|
+
(dot >> digits) |
|
|
192
|
+
# Pattern: DX+X (plus sign) - e.g., D1+1
|
|
193
|
+
(digits >> str("+") >> digits) |
|
|
194
|
+
# Pattern: DXXXXeYY or DXXXX.eYY (complex) - e.g., D2012.e27
|
|
195
|
+
(digits >> dot.maybe >> str("e") >> digits) |
|
|
196
|
+
# Pattern: D-X or DX or DX-d or DX_letter (original patterns)
|
|
197
|
+
# Handles: D12, D3.0, D043Rev18, suffixes like D15Sept
|
|
198
|
+
(str("-").maybe >> match("[0-9A-Za-z]").repeat(1) >> (str("-d") | (str("_") >> match("[0-9A-Za-z]").repeat(0))).maybe)
|
|
199
|
+
).as(:draft_version)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
rule(:draft_date) do
|
|
203
|
+
# Enhanced to handle: ", Sept 2008" or " Sept 2008" or ", Month Year"
|
|
204
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)) |
|
|
205
|
+
(((space? >> comma >> space?) | space) >> month_name.as(:month) >>
|
|
206
|
+
(
|
|
207
|
+
((space >> digits.as(:day)).maybe >> comma >> year_digits.as(:year)) |
|
|
208
|
+
(comma >> space? >> year_digits.as(:year)) |
|
|
209
|
+
(space >> year_digits.as(:year))
|
|
210
|
+
))
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
# FDIS and similar ISO stage codes without D prefix (Pattern 3)
|
|
214
|
+
# These appear after / in IEEE P identifiers but don't have the D prefix
|
|
215
|
+
# Examples: IEEE P15939/FDIS, IEEE P1234/CDV
|
|
216
|
+
rule(:fdraft) do
|
|
217
|
+
(slash >>
|
|
218
|
+
(str("FDIS") | str("CDV") | str("CD") | str("WD") | str("PWI") | str("NP")) >>
|
|
219
|
+
(
|
|
220
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)) | # Month Year
|
|
221
|
+
((comma | space) >> year_digits.as(:year)) # Year only (e.g., /FDIS, 2016)
|
|
222
|
+
).maybe >>
|
|
223
|
+
parenthetical.maybe).as(:fdraft)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
rule(:draft) do
|
|
227
|
+
(draft_prefix >> draft_version.repeat(1, 2) >>
|
|
228
|
+
(dot >> digits.as(:revision)).maybe >>
|
|
229
|
+
draft_date.maybe).as(:draft)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Edition - enhanced to support IEC formats like "Edition 1.0 2015-03"
|
|
233
|
+
rule(:edition) do
|
|
234
|
+
(comma >> year_digits.as(:year) >> str(" Edition")) |
|
|
235
|
+
((space | dash) >> str("Edition ") >>
|
|
236
|
+
(digits >> dot >> digits).as(:edition) >>
|
|
237
|
+
(space | str(" - ")) >>
|
|
238
|
+
year_digits.as(:year) >>
|
|
239
|
+
(dash >> digit.repeat(2, 2).as(:edition_month)).maybe) # Capture -MM as edition_month
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
# Part/subpart/year combinations
|
|
243
|
+
rule(:part_subpart_year) do
|
|
244
|
+
(part >> subpart.repeat(1, 2) >> year) |
|
|
245
|
+
(part >> subpart >> year) |
|
|
246
|
+
(part >> year) |
|
|
247
|
+
(part >> subpart) |
|
|
248
|
+
year |
|
|
249
|
+
part
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Corrigendum
|
|
253
|
+
rule(:corrigendum) do
|
|
254
|
+
# Enhanced: Accept space as separator, make separators more flexible
|
|
255
|
+
# Also accept "Corrigendum" as alternative to "Cor"
|
|
256
|
+
((str("_") | slash | dash | space) >>
|
|
257
|
+
(str("Corrigendum") | str("Cor")) >>
|
|
258
|
+
(dash | dot | space).maybe >> # More flexible separator after "Cor"
|
|
259
|
+
space? >> # Add space handling after separator
|
|
260
|
+
digits.as(:cor_number).maybe >>
|
|
261
|
+
((dash | str(":") | space) >> year_digits.as(:cor_year)).maybe).as(:corrigendum)
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
# Amendment
|
|
265
|
+
rule(:amendment) do
|
|
266
|
+
(slash >> str("Amd") >> digits.as(:amd_number) >>
|
|
267
|
+
(dash >> year_digits.as(:amd_year)).maybe).as(:amendment)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Interpretation notation (/INT)
|
|
271
|
+
# Enhanced to support optional year suffix: /INT-1991, /INT 1991
|
|
272
|
+
rule(:interpretation) do
|
|
273
|
+
(slash >> str("INT") >> ((dash | str(":") | space) >> year_digits.as(:int_year)).maybe).as(:interpretation)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
# Reaffirmed - enhanced to support (R1992) format without space
|
|
277
|
+
rule(:reaffirmed) do
|
|
278
|
+
(
|
|
279
|
+
# Format: "Reaffirmed 1992"
|
|
280
|
+
(str("Reaffirmed ") >> year_digits.as(:year)) |
|
|
281
|
+
# Format: "(R1992)" - parentheses with R prefix (with or without space before)
|
|
282
|
+
(space.maybe >> str("(R") >> year_digits.as(:year) >> str(")"))
|
|
283
|
+
).as(:reaffirmed)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Redline
|
|
287
|
+
rule(:redline) do
|
|
288
|
+
str(" - Redline").as(:redline)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Book nickname (e.g., "[The Orange Book]", "[IEEE Gold Book]")
|
|
292
|
+
rule(:book_nickname) do
|
|
293
|
+
space >> str("[") >> match("[^\\]]").repeat(1).as(:nickname) >> str("]")
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Relationship type keywords for Pattern 4 identifiers
|
|
297
|
+
rule(:relationship_revision_of) do
|
|
298
|
+
str("Revision of ") | str("Revison of ")
|
|
299
|
+
end
|
|
300
|
+
rule(:relationship_amendment_to) { str("Amendment to ") }
|
|
301
|
+
rule(:relationship_corrigendum_to) do
|
|
302
|
+
str("Corrigendum to ") | str("Corrigenda to ")
|
|
303
|
+
end
|
|
304
|
+
rule(:relationship_incorporates) do
|
|
305
|
+
str("incorporates ") | str("Incorporating ") | str("Incorporates ")
|
|
306
|
+
end
|
|
307
|
+
rule(:relationship_adoption_of) { str("Adoption of ") }
|
|
308
|
+
rule(:relationship_supplement_to) { str("Supplement to ") }
|
|
309
|
+
rule(:relationship_draft_amendment) do
|
|
310
|
+
str("Draft Amendment to ") | str("DRAFT Amendment to ")
|
|
311
|
+
end
|
|
312
|
+
rule(:relationship_draft_revision) { str("Draft Revision of ") }
|
|
313
|
+
rule(:relationship_reaffirmation) { str("Reaffirmation of ") }
|
|
314
|
+
rule(:relationship_redesignation) do
|
|
315
|
+
str("Redesignation of ") | str("redesignated as ")
|
|
316
|
+
end
|
|
317
|
+
rule(:relationship_supersedes) { str("Supersedes ") | str("Supercedes ") }
|
|
318
|
+
rule(:relationship_previously_designated) do
|
|
319
|
+
str("Previously designated as ")
|
|
320
|
+
end
|
|
321
|
+
rule(:relationship_includes) { str("Includes ") } # NEW Session 171
|
|
322
|
+
|
|
323
|
+
# Combined relationship type (longest match first)
|
|
324
|
+
rule(:relationship_type) do
|
|
325
|
+
relationship_draft_amendment.as(:draft_amendment_to) |
|
|
326
|
+
relationship_draft_revision.as(:draft_revision_of) |
|
|
327
|
+
relationship_previously_designated.as(:previously_designated_as) |
|
|
328
|
+
relationship_reaffirmation.as(:reaffirmation_of) |
|
|
329
|
+
relationship_redesignation.as(:redesignation_of) |
|
|
330
|
+
relationship_supersedes.as(:supersedes) |
|
|
331
|
+
relationship_includes.as(:includes) | # NEW Session 171
|
|
332
|
+
relationship_revision_of.as(:revision_of) |
|
|
333
|
+
relationship_amendment_to.as(:amendment_to) |
|
|
334
|
+
relationship_corrigendum_to.as(:corrigendum_to) |
|
|
335
|
+
relationship_incorporates.as(:incorporates) |
|
|
336
|
+
relationship_adoption_of.as(:adoption_of) |
|
|
337
|
+
relationship_supplement_to.as(:supplement_to)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
# Identifier string (for parsing list of related identifiers)
|
|
341
|
+
# Captures text until delimiter: comma, closing paren, "and", " / ", "; ", "as amended by"
|
|
342
|
+
# Uses absent? to ensure we stop at these delimiters
|
|
343
|
+
rule(:identifier_string) do
|
|
344
|
+
(
|
|
345
|
+
str(", and ").absent? >>
|
|
346
|
+
str(" and ").absent? >>
|
|
347
|
+
str(", ").absent? >>
|
|
348
|
+
str(" as amended by ").absent? >>
|
|
349
|
+
str(" / ").absent? >>
|
|
350
|
+
str("; ").absent? >>
|
|
351
|
+
str(")").absent? >>
|
|
352
|
+
match(".")
|
|
353
|
+
).repeat(1)
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
# Identifier list (comma and "and" separated)
|
|
357
|
+
rule(:identifier_list) do
|
|
358
|
+
identifier_string.as(:id) >>
|
|
359
|
+
(
|
|
360
|
+
(str(", and ") | str(" and ") | str(", ")) >>
|
|
361
|
+
identifier_string.as(:id)
|
|
362
|
+
).repeat
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
# "as amended by" clause with identifier list
|
|
366
|
+
rule(:as_amended_by_clause) do
|
|
367
|
+
# Variant 1: "as amended by IEEE's X, Y, Z"
|
|
368
|
+
(str(" as amended by IEEE's ") >> identifier_list.as(:amendments)) |
|
|
369
|
+
# Variant 2: "as amended by X, Y, Z" (standard)
|
|
370
|
+
(str(" as amended by ") >> identifier_list.as(:amendments)) |
|
|
371
|
+
# Variant 3: "and its approved amendments" (no specific list)
|
|
372
|
+
str(" and its approved amendments").as(:approved_amendments)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Relationship clause (handles all relationship types)
|
|
376
|
+
rule(:relationship_clause) do
|
|
377
|
+
space.maybe >> str("(") >>
|
|
378
|
+
relationship_type.as(:relationship_type) >>
|
|
379
|
+
identifier_list.as(:related_ids) >>
|
|
380
|
+
as_amended_by_clause.maybe >>
|
|
381
|
+
# Handle multiple relationships separated by " / " OR "; "
|
|
382
|
+
(
|
|
383
|
+
(str(" / ") | str("; ")) >> # Support both separators
|
|
384
|
+
relationship_type.as(:relationship_type) >>
|
|
385
|
+
identifier_list.as(:related_ids) >>
|
|
386
|
+
as_amended_by_clause.maybe
|
|
387
|
+
).repeat.as(:additional_rels) >>
|
|
388
|
+
str(")")
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
# Title portion separated by colon (Category 8)
|
|
392
|
+
rule(:title_portion) do
|
|
393
|
+
str(":") >> space >> match('[^\n]').repeat(1).as(:title)
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Approved Draft suffix (Category 7)
|
|
397
|
+
rule(:approved_draft_suffix) do
|
|
398
|
+
(space >> str("- (Approved Draft)")) | (space >> str("(Approved Draft)"))
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# Additional parameters (inside parentheses)
|
|
402
|
+
rule(:additional_parameters) do
|
|
403
|
+
(space.maybe >> str("(") >> # Make space before '(' optional
|
|
404
|
+
(reaffirmed |
|
|
405
|
+
# Handle "Revision of IEEE Std ..." with optional space after Std
|
|
406
|
+
(str("Revision of IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
|
|
407
|
+
# Handle typo "Revison of IEEE Std ..." with optional space after Std
|
|
408
|
+
(str("Revison of IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
|
|
409
|
+
# Handle "Revision to IEEE Std ..." with optional space after Std
|
|
410
|
+
(str("Revision to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
|
|
411
|
+
# Handle "Revison to IEEE Std ..." with optional space after Std
|
|
412
|
+
(str("Revison to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:revision_of)) |
|
|
413
|
+
# Amendment patterns (case-insensitive DRAFT)
|
|
414
|
+
((str("DRAFT") | str("Draft") | str("draft")) >> str(" Amendment to ") >> match("[^)]").repeat(1).as(:draft_amendment_to)) |
|
|
415
|
+
(str("Amendment to IEEE Std ") >> space.maybe >> match("[^)]").repeat(1).as(:amendment_to)) |
|
|
416
|
+
# Adoption patterns
|
|
417
|
+
(str("Adoption of ") >> match("[^)]").repeat(1).as(:adoption)) |
|
|
418
|
+
# Other specific patterns
|
|
419
|
+
(str("Notebooks") >> space? >> match("[^,\\)]").repeat(1).as(:notebooks)) |
|
|
420
|
+
(str("Standard Newspaper(s)") >> space? >> match("[^,\\)]").repeat(1).as(:standard_newspapers)) |
|
|
421
|
+
# Catch-all for any other parenthetical content (MUST BE LAST)
|
|
422
|
+
match("[^)]").repeat(1).as(:parenthetical_content)
|
|
423
|
+
) >>
|
|
424
|
+
str(")").maybe).as(:parameters)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# Parenthetical - try relationship_clause first, then fall back to additional_parameters
|
|
428
|
+
rule(:parenthetical) do
|
|
429
|
+
relationship_clause | additional_parameters
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
# IEC/IEEE copublished pattern - handle all variations comprehensively
|
|
433
|
+
# BUT exclude P prefix patterns (those are joint development)
|
|
434
|
+
rule(:iec_ieee_copublished) do
|
|
435
|
+
str("IEC/IEEE") >>
|
|
436
|
+
space >>
|
|
437
|
+
str("P").absent? >> # NOT a P prefix (would be joint development)
|
|
438
|
+
match("[^\n]").repeat(1).as(:content)
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Joint development patterns (ISO/IEC/IEEE in either IEEE or ISO format)
|
|
442
|
+
rule(:joint_development_ieee_format) do
|
|
443
|
+
# ISO/IEC/IEEE P26511/D8-2018 or ISO/IEEE P1003.1-2008 or IEC/IEEE P62582-1-2011
|
|
444
|
+
# ALSO handle: IEC/IEEE P60780-323, CDV1 2014 (comma before stage code)
|
|
445
|
+
# ALSO handle: IEEE/CSA P844.1/293.1/D2 (CSA dual numbering)
|
|
446
|
+
(str("ISO/IEC/IEEE") | str("ISO/IEEE") | str("IEC/IEEE") | str("IEEE/CSA")).as(:joint_publishers) >>
|
|
447
|
+
space >>
|
|
448
|
+
str("P") >> # P indicates IEEE-led
|
|
449
|
+
digits.as(:number) >>
|
|
450
|
+
((dot | dash) >> digits.as(:part)).maybe >> # Optional part like .1 or -1
|
|
451
|
+
# CSA dual numbering: /293.1 (second number)
|
|
452
|
+
(slash >> digits >> (dot >> digits).maybe >> (dash >> digits.as(:draft_version)).maybe).maybe >>
|
|
453
|
+
(
|
|
454
|
+
# Variant 1: /D8 notation (original)
|
|
455
|
+
(slash >> str("D") >> digits.as(:draft_version)) |
|
|
456
|
+
# Variant 2: , CDV1 notation (comma before stage code)
|
|
457
|
+
(comma >> (str("CDV") | str("FDIS") | str("CD") | str("DIS")).as(:iec_stage) >> digits.maybe.as(:stage_iteration))
|
|
458
|
+
).maybe >>
|
|
459
|
+
((dash >> year_digits.as(:year)) | # Either -YEAR
|
|
460
|
+
(comma.maybe >> space >> month_name.as(:month) >> space.maybe >> year_digits.as(:year))).maybe # Or Month YEAR (with optional comma)
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
rule(:joint_development_iso_format) do
|
|
464
|
+
# ISO/IEC/IEEE FDIS 26511:2018 (ISO-led format)
|
|
465
|
+
(str("ISO/IEC/IEEE") | str("ISO/IEEE") | str("IEC/IEEE")).as(:joint_publishers) >>
|
|
466
|
+
space >>
|
|
467
|
+
# ISO stage codes
|
|
468
|
+
(str("FDIS") | str("DIS") | str("CD") | str("WD") | str("PWI") | str("NP")).as(:iso_stage) >>
|
|
469
|
+
space >>
|
|
470
|
+
digits.as(:number) >>
|
|
471
|
+
((dot | dash) >> digits.as(:part)).maybe >> # Optional part
|
|
472
|
+
(str(":") >> year_digits.as(:year)).maybe
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Number-first pattern: "1873-2015 IEEE Standard..."
|
|
476
|
+
rule(:number_first_identifier) do
|
|
477
|
+
number >>
|
|
478
|
+
(dash >> year_digits.as(:year)).maybe >>
|
|
479
|
+
space >>
|
|
480
|
+
(publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >>
|
|
481
|
+
space >>
|
|
482
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
483
|
+
match("[^\n]").repeat(0).as(:title)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
# IEEE P pattern (without Std): "IEEE P1003.1..." OR just "P1003.1..." (prefix optional)
|
|
487
|
+
rule(:ieee_p_identifier) do
|
|
488
|
+
(str("IEEE").as(:publisher) >> space).maybe >> # Make IEEE prefix optional
|
|
489
|
+
str("P") >> space.maybe >> # Make space after P optional
|
|
490
|
+
number >>
|
|
491
|
+
(part_subpart_year | edition).maybe >>
|
|
492
|
+
# Pattern for /08 style drafts (digits without D prefix) - MUST come before corrigendum
|
|
493
|
+
(slash >> digits.as(:draft_version)).as(:digit_draft).maybe >>
|
|
494
|
+
# FDIS and other ISO stage codes without D prefix (Pattern 3)
|
|
495
|
+
fdraft.maybe >>
|
|
496
|
+
# Enhanced: Accept both comma and space before month/year
|
|
497
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
498
|
+
corrigendum.maybe >>
|
|
499
|
+
draft.maybe >>
|
|
500
|
+
# ALSO accept month/year after draft (some patterns like /DX, Month YEAR)
|
|
501
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
502
|
+
parenthetical.maybe
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# ANSI P pattern: "ANSI PN42.34-D9a, 2015" OR "ANSI P1234/D5"
|
|
506
|
+
rule(:ansi_p_identifier) do
|
|
507
|
+
str("ANSI").as(:publisher) >> space >>
|
|
508
|
+
str("P") >> space.maybe >> # Make space after P optional
|
|
509
|
+
number >>
|
|
510
|
+
(part_subpart_year | edition).maybe >>
|
|
511
|
+
# Enhanced: Accept both comma and space before month/year
|
|
512
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
513
|
+
corrigendum.maybe >>
|
|
514
|
+
draft.maybe >>
|
|
515
|
+
# ALSO accept month/year after draft
|
|
516
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
517
|
+
# Accept bare year after draft: ", 2015"
|
|
518
|
+
((comma | space) >> year_digits.as(:year)).maybe >>
|
|
519
|
+
parenthetical.maybe
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# IEEE Draft P pattern: "IEEE Draft P802.11..." OR "Draft P802.11..." (IEEE prefix optional)
|
|
523
|
+
rule(:ieee_draft_p_identifier) do
|
|
524
|
+
(str("IEEE").as(:publisher) >> space).maybe >> # Make IEEE prefix optional
|
|
525
|
+
str("Draft") >> space >>
|
|
526
|
+
str("P") >>
|
|
527
|
+
number >>
|
|
528
|
+
(part_subpart_year | edition).maybe >>
|
|
529
|
+
# Enhanced: Accept month/year after draft number
|
|
530
|
+
(space >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
531
|
+
draft.maybe >>
|
|
532
|
+
parenthetical.maybe
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
# IEEE Approved Draft pattern: "IEEE Approved Draft Std P..."
|
|
536
|
+
rule(:ieee_approved_draft_identifier) do
|
|
537
|
+
str("IEEE").as(:publisher) >>
|
|
538
|
+
space >>
|
|
539
|
+
str("Approved") >> space >>
|
|
540
|
+
(str("Draft Std") | str("Std")).as(:type) >> space >>
|
|
541
|
+
str("P").maybe >>
|
|
542
|
+
number >>
|
|
543
|
+
(part_subpart_year | edition).maybe >>
|
|
544
|
+
draft.maybe >>
|
|
545
|
+
parenthetical.maybe
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
# Combined AIEE identifier pattern: "AIEE No 72-1932 and AIEE No 73-1932"
|
|
549
|
+
# Handles "and"-separated AIEE identifiers (from "Nos X and Y" preprocessing)
|
|
550
|
+
rule(:combined_aiee_identifier) do
|
|
551
|
+
# First AIEE identifier
|
|
552
|
+
Aiee::Parser.new.aiee_identifier.as(:first_aiee) >>
|
|
553
|
+
# "and" separator
|
|
554
|
+
space >> str("and") >> space >>
|
|
555
|
+
# Second AIEE identifier
|
|
556
|
+
Aiee::Parser.new.aiee_identifier.as(:second_aiee)
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
# AIEE (American Institute of Electrical Engineers) patterns
|
|
560
|
+
# Detect AIEE patterns and delegate to AIEE parser
|
|
561
|
+
rule(:aiee_identifier) do
|
|
562
|
+
# Lookahead for AIEE patterns - do not consume input
|
|
563
|
+
(
|
|
564
|
+
# IEEE-AIEE transitional pattern
|
|
565
|
+
(str("IEEE-AIEE") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard") | str("Trans."))) |
|
|
566
|
+
# A.I.E.E. pattern (with dots, no spaces)
|
|
567
|
+
(str("A.I.E.E.") >> space >> (str("No.") | str("Nos") | str("No"))) |
|
|
568
|
+
# A. I. E. E. pattern (with dots and spaces)
|
|
569
|
+
(str("A. I. E. E.") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard"))) |
|
|
570
|
+
# AIEE pattern - extended to include more type words
|
|
571
|
+
(str("AIEE") >> space >> (str("No.") | str("Nos") | str("No") | str("Standard") | str("Trans.") | str("Std")))
|
|
572
|
+
).present? >>
|
|
573
|
+
# Delegate to AIEE parser if pattern detected
|
|
574
|
+
Aiee::Parser.new.aiee_identifier.as(:aiee)
|
|
575
|
+
end
|
|
576
|
+
|
|
577
|
+
# IRE (Institute of Radio Engineers) patterns
|
|
578
|
+
# Detect IRE patterns and delegate to IRE parser
|
|
579
|
+
rule(:ire_identifier) do
|
|
580
|
+
# Lookahead for IRE patterns - do not consume input
|
|
581
|
+
(
|
|
582
|
+
# Year-first pattern: "52 IRE 7.S2" or "60 IRE 28 PS7"
|
|
583
|
+
((match("[1-6]") >> digit >> space >> str("IRE")) | # 2-digit year format
|
|
584
|
+
(str("19") >> digit.repeat(2, 2) >> space >> str("IRE"))) |
|
|
585
|
+
# IEEE-IRE transitional pattern
|
|
586
|
+
(str("IEEE-IRE") >> space)
|
|
587
|
+
).present? >>
|
|
588
|
+
# Delegate to IRE parser if pattern detected
|
|
589
|
+
Ire::Parser.new.ire_identifier.as(:ire)
|
|
590
|
+
end
|
|
591
|
+
|
|
592
|
+
# NESC (National Electrical Safety Code) patterns
|
|
593
|
+
# Detect NESC patterns and delegate to NESC parser
|
|
594
|
+
rule(:nesc_identifier) do
|
|
595
|
+
# Lookahead for NESC patterns - do not consume input
|
|
596
|
+
(
|
|
597
|
+
# C2-YYYY pattern
|
|
598
|
+
(str("C2-") >> year_digits) |
|
|
599
|
+
# YYYY NESC pattern
|
|
600
|
+
(year_digits >> space >> (str("NESC") | str("National Electrical Safety Code"))) |
|
|
601
|
+
# Draft NESC pattern
|
|
602
|
+
(str("Draft") >> space >> (str("NESC") | str("National Electrical Safety Code"))) |
|
|
603
|
+
# Name-first pattern (NEW)
|
|
604
|
+
(str("National Electrical Safety Code") >> str(",") >> space >> str("C2-"))
|
|
605
|
+
).present? >>
|
|
606
|
+
# Delegate to NESC parser if pattern detected
|
|
607
|
+
Nesc::Parser.new.nesc_identifier.as(:nesc)
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
# IEEE/ASTM SI/PSI (Système International) patterns
|
|
611
|
+
# SI = Published metric system standard
|
|
612
|
+
# PSI = Proposed SI (draft)
|
|
613
|
+
rule(:ieee_astm_si_psi) do
|
|
614
|
+
str("IEEE/ASTM").as(:publishers) >>
|
|
615
|
+
space >>
|
|
616
|
+
(str("PSI") | str("SI")).as(:si_type) >>
|
|
617
|
+
space >>
|
|
618
|
+
digits.as(:number) >>
|
|
619
|
+
# Draft notation for PSI (e.g., /D2, /D3)
|
|
620
|
+
(slash >> str("D") >> digits.as(:draft_version)).maybe >>
|
|
621
|
+
# Year with optional month
|
|
622
|
+
(
|
|
623
|
+
# Format: ", Month Year"
|
|
624
|
+
(comma >> month_name.as(:month) >> space >> year_digits.as(:year)) |
|
|
625
|
+
# Format: "-YEAR"
|
|
626
|
+
(dash >> year_digits.as(:year))
|
|
627
|
+
).maybe >>
|
|
628
|
+
# Optional parenthetical (revision relationships)
|
|
629
|
+
parenthetical.maybe
|
|
630
|
+
end
|
|
631
|
+
|
|
632
|
+
# No-prefix IEEE identifier (characteristic patterns without "IEEE Std")
|
|
633
|
+
# These are patterns that are distinctly IEEE even without explicit publisher
|
|
634
|
+
rule(:no_prefix_ieee) do
|
|
635
|
+
characteristic_ieee_number.as(:number) >>
|
|
636
|
+
# Optional suffix (like -a, -b)
|
|
637
|
+
(dash >> match("[A-Za-z]")).maybe.as(:suffix) >>
|
|
638
|
+
# Optional year
|
|
639
|
+
(dash >> year_digits).maybe.as(:year) >>
|
|
640
|
+
# Optional draft notation
|
|
641
|
+
draft.maybe >>
|
|
642
|
+
# Optional language portion
|
|
643
|
+
(str("(E)") | str("(F)")).maybe >>
|
|
644
|
+
# Optional parenthetical content
|
|
645
|
+
parenthetical.maybe
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
# Corrigendum identifier with recursive base parsing
|
|
649
|
+
# Captures base identifier for recursive parsing, then corrigendum supplement
|
|
650
|
+
# Example: IEEE Std 535-2013/Cor. 1-2017
|
|
651
|
+
rule(:corrigendum_identifier) do
|
|
652
|
+
# Match a complete base identifier (reuse existing patterns)
|
|
653
|
+
# Try standard patterns that would match "IEEE Std 535-2013"
|
|
654
|
+
(
|
|
655
|
+
((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
|
|
656
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
657
|
+
number >>
|
|
658
|
+
part_subpart_year.maybe # This captures the full identifier before /Cor
|
|
659
|
+
).as(:base_identifier) >>
|
|
660
|
+
# Now match the corrigendum portion
|
|
661
|
+
(slash | dash | space) >>
|
|
662
|
+
str("Cor") >>
|
|
663
|
+
(dash | dot | space).maybe >> # More flexible separator after "Cor"
|
|
664
|
+
space? >>
|
|
665
|
+
digits.as(:cor_number) >>
|
|
666
|
+
((dash | str(":") | space) >> year_digits.as(:cor_year)).maybe >> # Optional cor year suffix
|
|
667
|
+
parenthetical.maybe
|
|
668
|
+
end
|
|
669
|
+
|
|
670
|
+
# Interpretation identifier with recursive base parsing
|
|
671
|
+
# Captures base identifier for recursive parsing, then interpretation supplement
|
|
672
|
+
# Example: IEEE Std 1076/INT-1991, IEEE Std 1003.1-1988/INT
|
|
673
|
+
rule(:interpretation_identifier) do
|
|
674
|
+
# Match a complete base identifier
|
|
675
|
+
(
|
|
676
|
+
((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
|
|
677
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
678
|
+
number >>
|
|
679
|
+
part_subpart_year.maybe
|
|
680
|
+
).as(:base_identifier) >>
|
|
681
|
+
# Now match the interpretation portion
|
|
682
|
+
(slash | dash | space) >>
|
|
683
|
+
str("INT") >>
|
|
684
|
+
((dash | str(":") | space) >> year_digits.as(:int_year)).maybe >> # Optional year suffix
|
|
685
|
+
parenthetical.maybe
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
# Conformance identifier with recursive base parsing
|
|
689
|
+
# Captures base identifier for recursive parsing, then conformance supplement
|
|
690
|
+
# Example: IEEE Std 802.16/Conformance01-2003
|
|
691
|
+
rule(:conformance_identifier) do
|
|
692
|
+
# Match a complete base identifier
|
|
693
|
+
(
|
|
694
|
+
((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >>
|
|
695
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
696
|
+
number >>
|
|
697
|
+
part_subpart_year.maybe
|
|
698
|
+
).as(:base_identifier) >>
|
|
699
|
+
# Now match the conformance portion
|
|
700
|
+
(slash | dash | space) >>
|
|
701
|
+
str("Conformance") >>
|
|
702
|
+
match("[0-9]").repeat(1).as(:conf_number) >>
|
|
703
|
+
dash >>
|
|
704
|
+
year_digits.as(:conf_year) >>
|
|
705
|
+
parenthetical.maybe
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
# Multi-numbered identifier: same document with multiple numbers
|
|
709
|
+
# Examples: IEEE Std 1299/C62.22.1-1996, IEEE Std 960-1989, Std 1177-1989
|
|
710
|
+
rule(:multi_numbered_identifier) do
|
|
711
|
+
# Primary identifier (full IEEE identifier)
|
|
712
|
+
((
|
|
713
|
+
(publisher >> space).maybe >>
|
|
714
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
715
|
+
number >>
|
|
716
|
+
(part_subpart_year | edition).maybe
|
|
717
|
+
).as(:primary_identifier) >>
|
|
718
|
+
# Separator: slash for cross-ref format, comma for joint standard
|
|
719
|
+
(slash >> str("C") >> digits >> dot >> digits >> dot >> digits >> dash >> year_digits).as(:secondary_crossref)) |
|
|
720
|
+
(comma >> space >> (type_word.as(:type) >> space?).maybe >> number >> dash >> year_digits).as(:secondary_joint)
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
# CSA dual published pattern: IEEE Std 844.1-2017/CSA C22.2 No. 293.1-17
|
|
724
|
+
rule(:csa_dual_published) do
|
|
725
|
+
# IEEE portion (full identifier)
|
|
726
|
+
(
|
|
727
|
+
publisher >> space >>
|
|
728
|
+
(type_word.as(:type) >> space?).maybe >>
|
|
729
|
+
number >>
|
|
730
|
+
(part_subpart_year | edition).maybe
|
|
731
|
+
).as(:ieee_portion) >>
|
|
732
|
+
# CSA portion with slash separator
|
|
733
|
+
slash >>
|
|
734
|
+
str("CSA") >> space >>
|
|
735
|
+
# CSA number formats (various patterns observed)
|
|
736
|
+
(
|
|
737
|
+
# Format 1: C22.2 No. 293.1-17 (with NO.)
|
|
738
|
+
(str("C") >> digit.repeat(2) >> dot >> digit >> space >> str("No") >> dot >> space >>
|
|
739
|
+
match("[0-9.]").repeat(1) >> (dash | str(":")) >> digit.repeat(2)) |
|
|
740
|
+
# Format 2: C293.2-17 (without NO., dash year)
|
|
741
|
+
(str("C") >> match("[0-9.]").repeat(1) >> dash >> digit.repeat(2)) |
|
|
742
|
+
# Format 3: C22.2 No. 293.3:19 (with NO., colon year)
|
|
743
|
+
(str("C") >> digit.repeat(2) >> dot >> digit >> space >> str("No") >> dot >> space >>
|
|
744
|
+
match("[0-9.]").repeat(1) >> str(":") >> digit.repeat(2)) |
|
|
745
|
+
# Format 4: C293.4:19 (without NO., colon year)
|
|
746
|
+
(str("C") >> match("[0-9.]").repeat(1) >> str(":") >> digit.repeat(2))
|
|
747
|
+
).as(:csa_portion)
|
|
748
|
+
end
|
|
749
|
+
|
|
750
|
+
# Basic IEEE identifier (no dual PubIDs or complex revisions yet)
|
|
751
|
+
rule(:identifier) do
|
|
752
|
+
combined_aiee_identifier |
|
|
753
|
+
aiee_identifier |
|
|
754
|
+
combined_aiee_identifier |
|
|
755
|
+
ire_identifier |
|
|
756
|
+
nesc_identifier |
|
|
757
|
+
ieee_astm_si_psi | # NEW Session 171: Add IEEE/ASTM SI/PSI support
|
|
758
|
+
multi_numbered_identifier | # NEW: Try multi-numbered identifiers before generic patterns
|
|
759
|
+
csa_dual_published | # NEW: Try CSA dual published before generic patterns
|
|
760
|
+
corrigendum_identifier | # NEW: Try corrigendum before generic patterns
|
|
761
|
+
interpretation_identifier | # NEW: Try interpretation identifier before generic patterns
|
|
762
|
+
conformance_identifier | # NEW: Try conformance identifier before generic patterns
|
|
763
|
+
joint_development_ieee_format |
|
|
764
|
+
joint_development_iso_format |
|
|
765
|
+
iec_ieee_copublished |
|
|
766
|
+
number_first_identifier |
|
|
767
|
+
ieee_approved_draft_identifier |
|
|
768
|
+
ieee_draft_p_identifier |
|
|
769
|
+
ieee_p_identifier |
|
|
770
|
+
ansi_p_identifier | # NEW: ANSI P prefix support
|
|
771
|
+
(((publisher >> copublisher.repeat.as(:copublishers)).as(:publishers) >> space).maybe >> # Make publisher optional
|
|
772
|
+
draft_status.as(:draft_status).maybe >>
|
|
773
|
+
(str("Draft Std").as(:type) >> space?).maybe >>
|
|
774
|
+
(type_word.as(:type) >> (space >> str("No") >> space).maybe >> space?).maybe >>
|
|
775
|
+
number >>
|
|
776
|
+
(part_subpart_year | edition).maybe >>
|
|
777
|
+
corrigendum.maybe >>
|
|
778
|
+
amendment.maybe >>
|
|
779
|
+
interpretation.maybe >> # NEW: Add /INT support
|
|
780
|
+
conformance.maybe >> # NEW: Add /Conformance support
|
|
781
|
+
ashrae_copub.maybe >> # NEW: Add /ASHRAE Guideline support
|
|
782
|
+
ieee_crossref.maybe >> # NEW: Add /C62.22.1-1996 cross-reference support
|
|
783
|
+
draft.maybe >>
|
|
784
|
+
# Enhanced: Accept both comma and space before month/year
|
|
785
|
+
((comma | space) >> month_name.as(:month) >> space >> year_digits.as(:year)).maybe >>
|
|
786
|
+
edition.maybe >>
|
|
787
|
+
parenthetical.maybe >> # REVERT: Back to single parenthetical
|
|
788
|
+
book_nickname.maybe >> # NEW: Add book nickname support
|
|
789
|
+
redline.maybe >>
|
|
790
|
+
title_portion.maybe >>
|
|
791
|
+
approved_draft_suffix.maybe) |
|
|
792
|
+
no_prefix_ieee # NEW: Try no-prefix patterns last (lowest priority)
|
|
793
|
+
end
|
|
794
|
+
|
|
795
|
+
root(:identifier)
|
|
796
|
+
|
|
797
|
+
def self.parse(string)
|
|
798
|
+
# Strip .pdf extension if present (Pattern 3: File Extensions)
|
|
799
|
+
cleaned = string.sub(/\.pdf$/i, "")
|
|
800
|
+
|
|
801
|
+
# Note: IEC and ANSI identifiers are NOT filtered here because they can have
|
|
802
|
+
# IEEE co-publication or adoption. The Base.parse method handles determining
|
|
803
|
+
# which standards are actually IEEE-related.
|
|
804
|
+
# ISO-only standards are still filtered as they have separate handling.
|
|
805
|
+
|
|
806
|
+
# Pattern 3: Replace underscore before ISO stage codes with slash
|
|
807
|
+
# These are joint development drafts that use underscore instead of slash
|
|
808
|
+
cleaned = cleaned.gsub(/_(FDIS|CDV|CD|DIS|WD|PWI|NP)/, '/\1')
|
|
809
|
+
|
|
810
|
+
# NEW: Normalize multiple spaces to single space
|
|
811
|
+
# No valid IEEE identifier pattern needs more than 1 space
|
|
812
|
+
cleaned = cleaned.gsub(/\s+/, " ")
|
|
813
|
+
|
|
814
|
+
# NEW Session 171: CONSERVATIVE data quality fixes for TODO.IEEE-MUST-DO.txt
|
|
815
|
+
# Only fix clear typos: space before dash + 4-digit year, OR dash + space + 4-digit year
|
|
816
|
+
# Do NOT touch " - " (space-dash-space) which is valid formatting
|
|
817
|
+
cleaned = cleaned.gsub(/(\d)\s+-(\d{4})\b/, '\1-\2') # "C37.101 -2006" → "C37.101-2006"
|
|
818
|
+
cleaned = cleaned.gsub(/(\d)-\s+(\d{4})\b/, '\1-\2') # "C62.35- 2010" → "C62.35-2010"
|
|
819
|
+
|
|
820
|
+
# NEW Session 171: HTML entity for en dash (–)
|
|
821
|
+
# ONLY convert if not already followed by a dash (avoid creating --)
|
|
822
|
+
cleaned = cleaned.gsub(/–(?!-)/, "-") # En dash → regular hyphen (if not followed by dash)
|
|
823
|
+
cleaned = cleaned.gsub("–-", "-") # En-dash-dash → single dash
|
|
824
|
+
|
|
825
|
+
# NEW Session 171: Remove wrong ! prefix
|
|
826
|
+
cleaned = cleaned.gsub(/^!IEEE /, "IEEE ")
|
|
827
|
+
|
|
828
|
+
# NEW Session 171: Fix "IEEE/ ASTM" spacing (extra space after slash)
|
|
829
|
+
cleaned = cleaned.gsub("IEEE/ ASTM", "IEEE/ASTM")
|
|
830
|
+
|
|
831
|
+
# NEW Phase 1: Handle HTML entities comprehensively
|
|
832
|
+
cleaned = cleaned.gsub("™", "™") # Trademark symbol
|
|
833
|
+
cleaned = cleaned.gsub("’", "'") # Smart apostrophe
|
|
834
|
+
cleaned = cleaned.gsub("&amp;", "&") # Double-encoded ampersand
|
|
835
|
+
cleaned = cleaned.gsub("&", "&") # Single-encoded ampersand
|
|
836
|
+
|
|
837
|
+
# NEW: Wrap P&V notation in parentheses (Paper & Video, etc.)
|
|
838
|
+
# Pattern: "IEEE Std 500-1984 P&V" → "IEEE Std 500-1984 (P&V)"
|
|
839
|
+
cleaned = cleaned.gsub(/\s+(P&V)\s*$/, ' (\1)')
|
|
840
|
+
|
|
841
|
+
# NEW Phase 1: Fix number spacing issues (e.g., "C57.1 2.25" → "C57.12.25")
|
|
842
|
+
# This handles cases where a space appears in the middle of a number
|
|
843
|
+
cleaned = cleaned.gsub(/(\d+\.\d+)\s+(\d+\.)/, '\1\2')
|
|
844
|
+
|
|
845
|
+
# NEW Phase 1: Fix year spacing issues (e.g., "1 996" → "1996")
|
|
846
|
+
# Remove spaces within 4-digit years
|
|
847
|
+
cleaned = cleaned.gsub(/\b(1|2)\s+(\d{3})\b/, '\1\2')
|
|
848
|
+
|
|
849
|
+
# NEW: Fix month+year spacing (e.g., "March2016" → "March 2016")
|
|
850
|
+
# Add space between month name and 4-digit year when they're concatenated
|
|
851
|
+
cleaned = cleaned.gsub(
|
|
852
|
+
/\b(January|February|March|April|May|June|July|August|September|October|November|December)(\d{4})\b/, '\1 \2'
|
|
853
|
+
)
|
|
854
|
+
# Also handle abbreviated months
|
|
855
|
+
cleaned = cleaned.gsub(
|
|
856
|
+
/\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)(\d{4})\b/, '\1 \2'
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
# NEW: Convert IEC/IEEE space-separated to semicolon format
|
|
860
|
+
# Pattern: "IEC 61523-3 First edition 2004-09; IEEE 1497" → already semicolon
|
|
861
|
+
# Pattern: "IEC 62539 First Edition 2007-07 IEEE 930" → needs semicolon
|
|
862
|
+
# Match: IEC identifier (with edition) + space + IEEE identifier
|
|
863
|
+
# Be conservative: only convert if IEC has "First edition" or similar and followed by IEEE
|
|
864
|
+
cleaned = cleaned.gsub(
|
|
865
|
+
/(IEC\s+\d+(?:-\d+)?(?:\s+First?\s+Edition\s+\d{4}-\d{2})?)\s+(IEEE\s+\S+)/, '\1; \2'
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
# NEW Phase 1 (Session 141): Remove literal trademark symbol
|
|
869
|
+
# "C57.110™-2018" → "C57.110-2018"
|
|
870
|
+
cleaned = cleaned.gsub(/™/, "")
|
|
871
|
+
|
|
872
|
+
# NEW Phase 1 (Session 141): Fix specific year typo
|
|
873
|
+
# "19969" → "1969" (very specific pattern, won't affect other text)
|
|
874
|
+
cleaned = cleaned.gsub(/\b19969\b/, "1969")
|
|
875
|
+
|
|
876
|
+
# NEW Session 169: Fix comma typo in 802.3 series numbers
|
|
877
|
+
# "802.3ch-2020,802.3ca-2020" → "802.3ch-2020, 802.3ca-2020"
|
|
878
|
+
# Very specific: 4 digits, comma, 3 digits (likely 802.3xx typo)
|
|
879
|
+
cleaned = cleaned.gsub(/(\d{4}),(\d{3})/, '\1, \2')
|
|
880
|
+
|
|
881
|
+
# NEW Session 169: Fix /lNT typo (lowercase L as 1)
|
|
882
|
+
# "1003.1/2003.l/lNT" → "1003.1/2003.1/INT"
|
|
883
|
+
cleaned = cleaned.gsub(/\/lNT\b/, "/INT")
|
|
884
|
+
cleaned = cleaned.gsub(".l/", ".1/") # Also fix .l/ -> .1/
|
|
885
|
+
|
|
886
|
+
# NEW Session 169: Fix I99O typo (letter I and O instead of digits)
|
|
887
|
+
# "IEEE 1076-CONC-I99O" → "IEEE 1076-CONC-1990"
|
|
888
|
+
cleaned = cleaned.gsub(/\bI99O\b/, "1990")
|
|
889
|
+
|
|
890
|
+
# NEW: Fix common typos (Category 9)
|
|
891
|
+
cleaned = cleaned.gsub(/^EEE /, "IEEE ")
|
|
892
|
+
|
|
893
|
+
# NEW Session 170: Additional safe typo fixes
|
|
894
|
+
# Fix "I EEE" (space between I and EEE)
|
|
895
|
+
cleaned = cleaned.gsub(/^I EEE /, "IEEE ")
|
|
896
|
+
|
|
897
|
+
# Fix "lEEE" (lowercase L instead of I)
|
|
898
|
+
cleaned = cleaned.gsub(/^lEEE /, "IEEE ")
|
|
899
|
+
|
|
900
|
+
# Fix missing closing parenthesis at end only (very conservative)
|
|
901
|
+
# Only if there's exactly one more opening than closing paren
|
|
902
|
+
open_count = cleaned.count("(")
|
|
903
|
+
close_count = cleaned.count(")")
|
|
904
|
+
if open_count == close_count + 1 && !cleaned.end_with?(")")
|
|
905
|
+
cleaned = "#{cleaned})"
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
# NEW Phase 1: Remove trailing commas/colons and text
|
|
909
|
+
cleaned = cleaned.gsub(/,\s*Standard\s*$/, "") # ", Standard" at end
|
|
910
|
+
cleaned = cleaned.gsub(/[,:]\s*$/, "") # Trailing comma/colon
|
|
911
|
+
cleaned = cleaned.gsub(/,\s+and\s+IEEE\s+Std\s/, " and ") # Handle "IEEE Std and Std" case
|
|
912
|
+
|
|
913
|
+
# Enhanced: Fix unbalanced parentheses comprehensively
|
|
914
|
+
# Handle three cases: missing closing, extra opening, nested unbalanced
|
|
915
|
+
open_count = cleaned.count("(")
|
|
916
|
+
close_count = cleaned.count(")")
|
|
917
|
+
|
|
918
|
+
if open_count > close_count
|
|
919
|
+
# More opening than closing - add closing parens at end
|
|
920
|
+
# This handles both simple missing and nested unbalanced cases
|
|
921
|
+
missing = open_count - close_count
|
|
922
|
+
cleaned = cleaned + (")" * missing)
|
|
923
|
+
elsif close_count > open_count
|
|
924
|
+
# More closing than opening - remove extra closing from end
|
|
925
|
+
# Very conservative: only remove trailing excess closing parens
|
|
926
|
+
extra = close_count - open_count
|
|
927
|
+
cleaned = cleaned.sub(/\){#{extra}}$/, "")
|
|
928
|
+
end
|
|
929
|
+
|
|
930
|
+
# === SESSION 173: TODO.IEEE-MUST-DO.txt Preprocessing Enhancements ===
|
|
931
|
+
|
|
932
|
+
# Part A: Simple Normalizations (Lines 13, 16, 32-35, 36, 39-41 from TODO)
|
|
933
|
+
|
|
934
|
+
# 1. Missing dash before year: "802.16g 2007" → "802.16g-2007"
|
|
935
|
+
# But be careful not to affect month names (already have space)
|
|
936
|
+
# Only apply if: digit + space + 4-digit year (and not after a month name)
|
|
937
|
+
cleaned = cleaned.gsub(/(\d)\s+(\d{4})(?=\s*\(|\s*$)/, '\1-\2')
|
|
938
|
+
|
|
939
|
+
# 2. Space-dash-space before year: "802.1ag - 2007" → "802.1ag-2007"
|
|
940
|
+
# This is distinct from " - " in titles, targets space-dash-space-year pattern
|
|
941
|
+
cleaned = cleaned.gsub(/\s+-\s+(\d{4})\b/, '-\1')
|
|
942
|
+
|
|
943
|
+
# 3. Add missing "Std" after IEEE: "IEEE 1070-1995" → "IEEE Std 1070-1995"
|
|
944
|
+
# Only at start of string, IEEE + space + digit
|
|
945
|
+
cleaned = cleaned.gsub(/^IEEE\s+(?!Std\b)(\d)/, 'IEEE Std \1')
|
|
946
|
+
|
|
947
|
+
# 3.5. Convert "IEEE No." to "IEEE Std": "IEEE No. 264-1968" → "IEEE Std 264-1968"
|
|
948
|
+
# NOTE: Do NOT convert AIEE No - AIEE uses "No" as standard format
|
|
949
|
+
cleaned = cleaned.gsub(/^IEEE\s+No\.\s*/, "IEEE Std ")
|
|
950
|
+
cleaned = cleaned.gsub(/^IEEE\s+No\s/, "IEEE Std ")
|
|
951
|
+
# Skip AIEE No conversion - AIEE preserves "No" format
|
|
952
|
+
|
|
953
|
+
# 4. Space before slash in dual published: "262-1973 /ANSI" → "262-1973/ANSI"
|
|
954
|
+
cleaned = cleaned.gsub(/\s+\//, "/")
|
|
955
|
+
|
|
956
|
+
# 5. Comma before Edition: ", 1998 Edition" → "-1998"
|
|
957
|
+
# Normalize to standard year format for parser
|
|
958
|
+
cleaned = cleaned.gsub(/,\s+(\d{4})\s+Edition/, '-\1')
|
|
959
|
+
|
|
960
|
+
# 6. ISO/IEC spacing: "ISO/IEC15802" → "ISO/IEC 15802"
|
|
961
|
+
# Add space between publisher prefix and number
|
|
962
|
+
cleaned = cleaned.gsub(/(ISO\/IEC)(\d)/, '\1 \2')
|
|
963
|
+
|
|
964
|
+
# Part B: Publisher Order (Line 38 from TODO)
|
|
965
|
+
|
|
966
|
+
# Fix wrong publisher order: "IEEE Std ANSI/IEEE" → "ANSI/IEEE Std"
|
|
967
|
+
# This handles cases where IEEE Std appears before ANSI/IEEE publisher
|
|
968
|
+
cleaned = cleaned.gsub(/^IEEE\s+Std\s+(ANSI\/IEEE)/, '\1 Std')
|
|
969
|
+
|
|
970
|
+
# Part C: Dual Published Formats (Lines 8, 19 from TODO)
|
|
971
|
+
|
|
972
|
+
# 1. Semicolon to parenthetical for dual published (MultiLabeledIdentifier)
|
|
973
|
+
# "IEEE Std 120-1955; ASME PTC 19.6-1955" → "IEEE Std 120-1955 (ASME PTC 19.6-1955)"
|
|
974
|
+
# Only if semicolon + space + organization abbreviation (capital letters)
|
|
975
|
+
if cleaned.match?(/;\s+[A-Z]{2,}/)
|
|
976
|
+
cleaned = cleaned.sub(/;\s+([A-Z][^;]+)$/, ' (\1)')
|
|
977
|
+
end
|
|
978
|
+
|
|
979
|
+
# === SESSION 174: Additional TODO.IEEE-MUST-DO.txt Preprocessing ===
|
|
980
|
+
|
|
981
|
+
# Part A: Edition Abbreviation Normalization (Lines 10-11)
|
|
982
|
+
# Pattern: ", 1999 Edn. (Reaff 2003)" → "-1999 (R2003)"
|
|
983
|
+
# Normalize both the Edition abbreviation and the Reaffirmed format
|
|
984
|
+
cleaned = cleaned.gsub(/,\s+(\d{4})\s+Edn\.\s+\(Reaff\s+(\d{4})\)/,
|
|
985
|
+
'-\1 (R\2)')
|
|
986
|
+
# Also handle without initial comma (might occur in relationships)
|
|
987
|
+
cleaned = cleaned.gsub(/(\d{4})\s+Edn\.\s+\(Reaff\s+(\d{4})\)/,
|
|
988
|
+
'\1 (R\2)')
|
|
989
|
+
|
|
990
|
+
# Part B: IRE Parenthetical Split (Line 9)
|
|
991
|
+
# Pattern: "(Reaffirmed 1980, 56 IRE 28.S2)" → "(R1980) (56 IRE 28.S2)"
|
|
992
|
+
# Split nested reaffirmation + IRE reference into two parentheticals
|
|
993
|
+
cleaned = cleaned.gsub(/\(Reaffirmed\s+(\d{4}),\s+(\d+\s+IRE[^)]+)\)/,
|
|
994
|
+
'(R\1) (\2)')
|
|
995
|
+
|
|
996
|
+
# Part C: Slash to Parenthetical (Line 37)
|
|
997
|
+
# Pattern: "number-year/ANSI identifier" → "number-year (ANSI identifier)"
|
|
998
|
+
# Only convert if slash is followed by ANSI and NOT a relationship keyword
|
|
999
|
+
# Look ahead to ensure we're at end of main identifier (before paren or end of string)
|
|
1000
|
+
cleaned = cleaned.gsub(%r{(\d{4})/ANSI\s+([^(]+)(?=\s*\(|$)},
|
|
1001
|
+
'\1 (ANSI \2)')
|
|
1002
|
+
|
|
1003
|
+
# Part D: ISO/IEC TR Spacing (Line 40)
|
|
1004
|
+
# Pattern: "ISO/IEC TR11802" → "ISO/IEC TR 11802"
|
|
1005
|
+
# Add space after TR when directly followed by digit
|
|
1006
|
+
cleaned = cleaned.gsub(/(ISO\/IEC\s+TR)(\d)/, '\1 \2')
|
|
1007
|
+
# === SESSION 178: AIEE Dual Numbers Expansion (Line 45) ===
|
|
1008
|
+
|
|
1009
|
+
# Part E: AIEE "Nos X and Y" Expansion
|
|
1010
|
+
# Pattern: "AIEE Nos 72 and 73 - 1932" → "AIEE No 72-1932 and AIEE No 73-1932"
|
|
1011
|
+
# Expands dual AIEE numbers to separate identifiers with shared year
|
|
1012
|
+
if cleaned.match?(/AIEE\s+Nos\s+(\d+)\s+and\s+(\d+)\s+-\s+(\d{4})/)
|
|
1013
|
+
cleaned = cleaned.sub(/AIEE\s+Nos\s+(\d+)\s+and\s+(\d+)\s+-\s+(\d{4})/) do
|
|
1014
|
+
first_num = $1
|
|
1015
|
+
second_num = $2
|
|
1016
|
+
year = $3
|
|
1017
|
+
"AIEE No #{first_num}-#{year} and AIEE No #{second_num}-#{year}"
|
|
1018
|
+
end
|
|
1019
|
+
end
|
|
1020
|
+
|
|
1021
|
+
# === SESSION 222: TODO.IEEE-MUST-FIX-IDs.txt Comprehensive Fixes ===
|
|
1022
|
+
|
|
1023
|
+
# Part A: Typo Fixes
|
|
1024
|
+
# 1. "Stad" -> "Std" (typo)
|
|
1025
|
+
cleaned = cleaned.gsub(/\bStad\b/, "Std")
|
|
1026
|
+
|
|
1027
|
+
# 2. Lowercase "std" -> "Std" when after IEEE/ANSI publishers
|
|
1028
|
+
cleaned = cleaned.gsub(/\b(IEEE|ANSI|AIEE)\s+std\b/, '\1 Std')
|
|
1029
|
+
|
|
1030
|
+
# Part B: Symbol Normalization
|
|
1031
|
+
# 3. Additional (TM) patterns - strip them out
|
|
1032
|
+
cleaned = cleaned.gsub("(TM)", "")
|
|
1033
|
+
|
|
1034
|
+
# Part C: Year-first format normalization
|
|
1035
|
+
# 4. Pattern "62704-4/D4, 2020" -> "IEEE P62704-4/D4, 2020"
|
|
1036
|
+
# Only if starts with digits-dash-digits/D pattern
|
|
1037
|
+
if cleaned.match?(/^(\d+[-.]\d+)\/D\d+/)
|
|
1038
|
+
cleaned = "IEEE P#{cleaned}"
|
|
1039
|
+
end
|
|
1040
|
+
|
|
1041
|
+
# Part D: Suffix Normalization
|
|
1042
|
+
# 5. "/Preprint" -> remove (data quality - not standard suffix)
|
|
1043
|
+
cleaned = cleaned.gsub(/\/Preprint\b/, "")
|
|
1044
|
+
|
|
1045
|
+
# Part E: Relationship Text Normalization
|
|
1046
|
+
# 6. "Proposed Revision of" -> "Revision of"
|
|
1047
|
+
cleaned = cleaned.gsub("Proposed Revision of", "Revision of")
|
|
1048
|
+
|
|
1049
|
+
# 7. "ammended" typo -> "amended"
|
|
1050
|
+
cleaned = cleaned.gsub(/\bammended\b/i, "amended")
|
|
1051
|
+
|
|
1052
|
+
# Part F: Trailing Characters After Special Patterns
|
|
1053
|
+
# 8. Remove trailing periods after /INT, /Cor, etc.
|
|
1054
|
+
cleaned = cleaned.gsub(/(\/INT|\/Cor\s+\d+-\d{4})\./, '\1')
|
|
1055
|
+
|
|
1056
|
+
# Part G: Conformance Pattern Spacing
|
|
1057
|
+
# 9. Fix spacing in "/Conformance" patterns WITHOUT year (malformed only)
|
|
1058
|
+
# "1904.1(TM)/Conformance02" -> "1904.1 /Conformance02" (space before slash)
|
|
1059
|
+
# BUT: DO NOT touch valid patterns like "802.16/Conformance01-2003" (with year)
|
|
1060
|
+
# Use positive check for year suffix to exclude valid patterns
|
|
1061
|
+
# Actually, this preprocessing is breaking valid patterns - just remove it entirely
|
|
1062
|
+
# The parser can handle both "6/Conformance01-2003" and "6 /Conformance02" formats
|
|
1063
|
+
|
|
1064
|
+
# Part H: Edition Text After /INT
|
|
1065
|
+
# 10. Handle ", Month YYYY Edition" after /INT by converting to month-year format
|
|
1066
|
+
# "1003.1/INT, March 1994 Edition" -> "1003.1/INT, March 1994"
|
|
1067
|
+
cleaned = cleaned.gsub(/(\/INT),\s+([A-Z][a-z]+)\s+(\d{4})\s+Edition/,
|
|
1068
|
+
'\1, \2 \3')
|
|
1069
|
+
|
|
1070
|
+
# Part I: Handle "Ed." abbreviation
|
|
1071
|
+
# 11. "Dec. 1994 Ed." -> "Dec. 1994"
|
|
1072
|
+
cleaned = cleaned.gsub(/\s+Ed\.\s*$/, "")
|
|
1073
|
+
|
|
1074
|
+
# === PHASE 2: High-impact preprocessing for fixture failures ===
|
|
1075
|
+
|
|
1076
|
+
# Quick wins from SESSION 224 (must come before more complex fixes)
|
|
1077
|
+
|
|
1078
|
+
# Remove period after "Std": "IEEE Std." -> "IEEE Std"
|
|
1079
|
+
cleaned = cleaned.gsub(/\bStd\.\s+/, "Std ")
|
|
1080
|
+
|
|
1081
|
+
# Redline Suffix Removal: " - Redline" at end
|
|
1082
|
+
cleaned = cleaned.gsub(/\s+-\s+Redline\b.*$/, "")
|
|
1083
|
+
|
|
1084
|
+
# Title portion removal after year: "YYYY - IEEE Standard for..."
|
|
1085
|
+
cleaned = cleaned.gsub(
|
|
1086
|
+
/(\d{4})(\s+\([^)]+\))?\s+-\s+IEEE\s+Standard\s+for.*$/, '\1\2'
|
|
1087
|
+
)
|
|
1088
|
+
|
|
1089
|
+
# Fix 2A: "IEEE PC" prefix -> "IEEE Std PC" or "IEEE P" treatment
|
|
1090
|
+
# "IEEE PC37.20.9/D7.3A" -> needs to parse as IEEE project draft
|
|
1091
|
+
# Strategy: Add "Std" after "IEEE" when followed by "PC" to route to standard pattern
|
|
1092
|
+
# Actually, the issue is the number rule consumes "PC37" as P + C37.
|
|
1093
|
+
# Better: normalize "IEEE PC" to "IEEE Std PC" so it hits the standard identifier path
|
|
1094
|
+
cleaned = cleaned.gsub(/^IEEE\s+PC(\d)/, 'IEEE Std PC\1')
|
|
1095
|
+
cleaned = cleaned.gsub(/^IEEE\s+Unapproved\s+Draft\s+Std\s+PC(\d)/,
|
|
1096
|
+
'IEEE Unapproved Draft Std PC\1')
|
|
1097
|
+
|
|
1098
|
+
# Fix 2B: "IEEE P" without "Std"/"Draft" prefix
|
|
1099
|
+
# ieee_p_identifier rule handles these directly - no preprocessing needed
|
|
1100
|
+
# Only handle "IEEE P" followed by "and ASHRAE" (copub case)
|
|
1101
|
+
cleaned = cleaned.gsub(/^IEEE\s+P(\d+)\s+and\s+ASHRAE/,
|
|
1102
|
+
'IEEE Std P\1 and ASHRAE')
|
|
1103
|
+
|
|
1104
|
+
# Fix 2C: "ISO/IEC XXXX-YYYY: Title" -> strip title after colon for ISO/IEC published standards
|
|
1105
|
+
# These are ISO-format identifiers with IEEE adoption, strip the title
|
|
1106
|
+
cleaned = cleaned.gsub(/^(ISO\/IEC \d+[-.]\d+-\d{4}):.*$/, '\1')
|
|
1107
|
+
cleaned = cleaned.gsub(/^(ISO\/IEC \d+-\d{4}):.*$/, '\1')
|
|
1108
|
+
|
|
1109
|
+
# Fix 2D: "ISO/IEC XXXX : YYYY" -> normalize spacing around colon
|
|
1110
|
+
cleaned = cleaned.gsub(/^(ISO\/IEC \d+[-.]\d*)\s*:\s*(\d{4})/, '\1:\2')
|
|
1111
|
+
cleaned = cleaned.gsub(/^(ISO\/IEC \d+)\s*:\s*(\d{4})/, '\1:\2')
|
|
1112
|
+
|
|
1113
|
+
# Fix 2G: "IEC/IEEE PXXX_D5" -> underscore to slash
|
|
1114
|
+
cleaned = cleaned.gsub(/^(IEC\/IEEE P[\w.-]+)_D/, '\1/D')
|
|
1115
|
+
|
|
1116
|
+
# Fix 2H: "IEC XXXX First edition YYYY-MM; IEEE NNNN" -> normalize semicolon
|
|
1117
|
+
# Already handled by earlier semicolon normalization
|
|
1118
|
+
|
|
1119
|
+
# Fix 2I: "IEEE/ISO/IEC PXXX/DIS" -> normalize to "ISO/IEC/IEEE PXXX/DIS"
|
|
1120
|
+
cleaned = cleaned.gsub(/^IEEE\/ISO\/IEC\s+(P[\w.-]+)/,
|
|
1121
|
+
'ISO/IEC/IEEE \1')
|
|
1122
|
+
cleaned = cleaned.gsub(/^IEEE\/IEC\/ISO\s+(P[\w.-]+)/,
|
|
1123
|
+
'IEC/ISO/IEEE \1')
|
|
1124
|
+
|
|
1125
|
+
# Fix 2J: "IEEE/IEC PXXX D5" -> normalize space to slash before D
|
|
1126
|
+
cleaned = cleaned.gsub(/^(IEEE\/IEC P[\w.-]+)\s+D(\d)/, '\1/D\2')
|
|
1127
|
+
cleaned = cleaned.gsub(
|
|
1128
|
+
/^(IEEE\/IEC P[\w.-]+)\s+(CDV|FDIS|CD|DIS|ED\d)/, '\1/\2'
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
# Fix 2K: "ISO /IEC/IEEE" -> fix space before slash
|
|
1132
|
+
cleaned = cleaned.gsub(/^ISO\s+\/IEC\/IEEE/, "ISO/IEC/IEEE")
|
|
1133
|
+
cleaned = cleaned.gsub(/^ISO\s+\/IEC/, "ISO/IEC")
|
|
1134
|
+
|
|
1135
|
+
# Fix 2L: "IS0" typo (letter O instead of digit 0)
|
|
1136
|
+
cleaned = cleaned.gsub(/^IS0\//, "ISO/")
|
|
1137
|
+
|
|
1138
|
+
# Fix 2M: "IEEE-P15026-3-DIS-January 2015" -> dash-separated format
|
|
1139
|
+
# Normalize to "ISO/IEC/IEEE P15026-3/DIS, January 2015"
|
|
1140
|
+
cleaned = cleaned.gsub(/^IEEE-P(\d+)-(\d+)-DIS-(.*)/,
|
|
1141
|
+
'ISO/IEC/IEEE P\1-\2/DIS, \3')
|
|
1142
|
+
|
|
1143
|
+
# Fix 2N: "IEEE/CSA P844.1/293.1/D2" -> normalize CSA dual numbering
|
|
1144
|
+
cleaned = cleaned.gsub(/^IEEE\/CSA\s+(P[\d.]+)\/([\d.]+)\/D(\d+)/,
|
|
1145
|
+
'IEEE/CSA \1/D\3')
|
|
1146
|
+
|
|
1147
|
+
# Fix 2O: "IEEE Approved Draft Std P" -> normalize spacing
|
|
1148
|
+
cleaned = cleaned.gsub(/^IEEE\s+Approved\s+Draft\s+Std\s+(P\d)/,
|
|
1149
|
+
'IEEE Approved Draft Std \1')
|
|
1150
|
+
# Fix: "IEEE Approved Draft Std P1234 / D12" -> remove space before slash
|
|
1151
|
+
cleaned = cleaned.gsub(/^(IEEE Approved Draft Std P[\w.-]+)\s+\/\s*D/,
|
|
1152
|
+
'\1/D')
|
|
1153
|
+
|
|
1154
|
+
# Fix 2P: "IEEE/EIA" -> normalize (parser handles IEEE/EIA via copublisher)
|
|
1155
|
+
# Already works - no fix needed
|
|
1156
|
+
|
|
1157
|
+
# Fix 2Q: AIEE format variations
|
|
1158
|
+
# "AIEE No.1C-1954" -> "AIEE No. 1C-1954" (add space after No.)
|
|
1159
|
+
cleaned = cleaned.gsub(/^AIEE\s+No\.\s*(\d)/, 'AIEE No. \1')
|
|
1160
|
+
# "AIEE no 700-1945" -> "AIEE No 700-1945" (capitalize)
|
|
1161
|
+
cleaned = cleaned.gsub(/^AIEE\s+no\s/, "AIEE No ")
|
|
1162
|
+
# "AIEE Std No. 800" -> "AIEE Standard No 800" (normalize type word)
|
|
1163
|
+
cleaned = cleaned.gsub(/^AIEE\s+Std\s+No\.\s*/, "AIEE Standard No ")
|
|
1164
|
+
# "AIEE No 750.1-1960" -> handled by AIEE parser if decimal support added
|
|
1165
|
+
|
|
1166
|
+
# Fix 2R: "IEEE PSI 10/D2" -> normalize to "IEEE/ASTM PSI 10/D2"
|
|
1167
|
+
cleaned = cleaned.gsub(/^IEEE\s+PSI\s+(\d)/, 'IEEE/ASTM PSI \1')
|
|
1168
|
+
|
|
1169
|
+
# Fix 2S: "IEEE/IEC P62271-111/PC37.60_D5" -> normalize
|
|
1170
|
+
cleaned = cleaned.gsub(/^(IEEE\/IEC P[\d.-]+\/PC[\d.]+)_D/, '\1/D')
|
|
1171
|
+
|
|
1172
|
+
# Fix 2T: "IEC P62271-111/IEEE PC37.60_D5" -> normalize to IEC/IEEE format
|
|
1173
|
+
cleaned = cleaned.gsub(/^IEC\s+(P[\d.-]+)\/IEEE\s+(PC[\d.]+)_D/,
|
|
1174
|
+
'IEC/IEEE \2/D')
|
|
1175
|
+
|
|
1176
|
+
# Fix 2U: "IEC/IEC P" -> "IEC/IEEE P" (typo)
|
|
1177
|
+
cleaned = cleaned.gsub(/^IEC\/IEC\s+(P\d)/, 'IEC/IEEE \1')
|
|
1178
|
+
|
|
1179
|
+
# Fix 2V: "NACE SPXXXX-YYYY/IEEE Std NNNN-YYYY" -> normalize slash to parenthetical
|
|
1180
|
+
cleaned = cleaned.gsub(/^(NACE\s+SP\d+-\d+)\/(IEEE\s+Std\s+\d+-\d+)$/,
|
|
1181
|
+
'\1 (\2)')
|
|
1182
|
+
|
|
1183
|
+
# Fix 2W: "IEEE Std 802.11g-2003 (Amendment to IEEE Std 802.11, 1999 Edn. (Reaff 2003) as amended by"
|
|
1184
|
+
# This is a complex relationship - strip the parenthetical if too complex
|
|
1185
|
+
# Let the parser handle it but fix "Edn." to "Edition"
|
|
1186
|
+
cleaned = cleaned.gsub("Edn.", "Edition")
|
|
1187
|
+
|
|
1188
|
+
# Fix 2X: "IEEE-P15026-3-DIS" format -> normalize
|
|
1189
|
+
# Already handled by Fix 2M
|
|
1190
|
+
|
|
1191
|
+
# Fix 2Y: "P1635/D10/ASHARE 21/D10" -> fix ASHARE typo to ASHRAE
|
|
1192
|
+
cleaned = cleaned.gsub("ASHARE", "ASHRAE")
|
|
1193
|
+
|
|
1194
|
+
# Fix 2Z: "PC37.30.2/D043 Rev 18" -> normalize draft version with Rev
|
|
1195
|
+
# "PC57-15 D2.0" -> normalize to "P57-15/D2.0"
|
|
1196
|
+
cleaned = cleaned.gsub(/^PC(\d)/, 'P\1')
|
|
1197
|
+
|
|
1198
|
+
# Fix 2AA: "IEEE/ISO/IEC 8802-1Q-2020/Amd31-2021" -> normalize
|
|
1199
|
+
cleaned = cleaned.gsub(/^IEEE\/ISO\/IEC\s+(8802[\w.-]+)/,
|
|
1200
|
+
'ISO/IEC/IEEE \1')
|
|
1201
|
+
|
|
1202
|
+
# Fix 2AB: "IEEE C57.139/D14June 2010" -> add missing space
|
|
1203
|
+
cleaned = cleaned.gsub(
|
|
1204
|
+
/^(IEEE\s+C?\d[\d.]*\/D\d+)([A-Z][a-z]+\s+\d{4})/, '\1, \2'
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
# Fix 2AC: "IEEE Std: Title" -> strip colon and title (ANSI/IEEE Std: )
|
|
1208
|
+
cleaned = cleaned.gsub(/^(ANSI\/IEEE Std):\s+.*$/, '\1')
|
|
1209
|
+
|
|
1210
|
+
# Fix 2AD: "IEEE 1076 IEC 61691-1-1 First edition 2004-10" -> semicolon format
|
|
1211
|
+
cleaned = cleaned.gsub(
|
|
1212
|
+
/^(IEEE\s+[\d.]+)\s+(IEC\s+\d+[-\d]*\s+.*edition\s+\d{4}-\d{2})$/i, '\1; \2'
|
|
1213
|
+
)
|
|
1214
|
+
|
|
1215
|
+
# Fix 2AE: "IEEE No 29-1941 / ASA C77.1-1943" -> normalize to IEEE Std format
|
|
1216
|
+
cleaned = cleaned.gsub(/^IEEE\s+No\s+(\d+-\d+)\s+\/\s+ASA\s+(.*)/,
|
|
1217
|
+
'IEEE Std \1 (ASA \2)')
|
|
1218
|
+
|
|
1219
|
+
# Fix 2AF: "IEEE Std 1003.1/2003.l/lNT" -> fix typos
|
|
1220
|
+
# .l -> .1 and lNT -> INT handled by existing fixes
|
|
1221
|
+
|
|
1222
|
+
new.parse(cleaned)
|
|
1223
|
+
end
|
|
1224
|
+
end
|
|
1225
|
+
end
|
|
1226
|
+
end
|