atlas_engine 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +123 -0
- data/Rakefile +20 -0
- data/app/assets/config/atlas_engine_manifest.js +3 -0
- data/app/assets/stylesheets/atlas_engine/application.css +15 -0
- data/app/concerns/atlas_engine/handles_blob.rb +26 -0
- data/app/concerns/atlas_engine/handles_interruption.rb +22 -0
- data/app/controllers/atlas_engine/application_controller.rb +7 -0
- data/app/controllers/atlas_engine/connectivity_controller.rb +21 -0
- data/app/controllers/atlas_engine/country_imports_controller.rb +73 -0
- data/app/controllers/atlas_engine/graphql_controller.rb +59 -0
- data/app/countries/atlas_engine/ar/country_profile.yml +9 -0
- data/app/countries/atlas_engine/at/address_importer/corrections/open_address/city_corrector.rb +23 -0
- data/app/countries/atlas_engine/at/country_profile.yml +24 -0
- data/app/countries/atlas_engine/at/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/at/synonyms.yml +6 -0
- data/app/countries/atlas_engine/at/validation_transcriber/address_parser.rb +58 -0
- data/app/countries/atlas_engine/au/address_importer/open_address/filter.rb +26 -0
- data/app/countries/atlas_engine/au/address_importer/open_address/mapper.rb +41 -0
- data/app/countries/atlas_engine/au/country_profile.yml +13 -0
- data/app/countries/atlas_engine/au/synonyms.yml +209 -0
- data/app/countries/atlas_engine/au/validation_transcriber/address_parser.rb +121 -0
- data/app/countries/atlas_engine/be/country_profile.yml +12 -0
- data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +38 -0
- data/app/countries/atlas_engine/bm/address_importer/open_address/mapper.rb +40 -0
- data/app/countries/atlas_engine/bm/country_profile.yml +12 -0
- data/app/countries/atlas_engine/br/country_profile.yml +4 -0
- data/app/countries/atlas_engine/ca/country_profile.yml +7 -0
- data/app/countries/atlas_engine/ca/synonyms.yml +1615 -0
- data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/city_corrector.rb +29 -0
- data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/locale_corrector.rb +74 -0
- data/app/countries/atlas_engine/ch/address_importer/open_address/mapper.rb +40 -0
- data/app/countries/atlas_engine/ch/country_profile.yml +15 -0
- data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +15 -0
- data/app/countries/atlas_engine/ch/locales/de/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/ch/locales/de/synonyms.yml +7 -0
- data/app/countries/atlas_engine/ch/locales/fr/synonyms.yml +21 -0
- data/app/countries/atlas_engine/cz/country_profile.yml +6 -0
- data/app/countries/atlas_engine/de/country_profile.yml +19 -0
- data/app/countries/atlas_engine/de/index_configuration.yml +64 -0
- data/app/countries/atlas_engine/de/synonyms.yml +2 -0
- data/app/countries/atlas_engine/de/validation_transcriber/address_parser.rb +19 -0
- data/app/countries/atlas_engine/dk/country_profile.yml +6 -0
- data/app/countries/atlas_engine/dk/synonyms.yml +3 -0
- data/app/countries/atlas_engine/dk/validation_transcriber/address_parser.rb +21 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +5 -0
- data/app/countries/atlas_engine/fr/address_importer/corrections/open_address/city_corrector.rb +28 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +13 -0
- data/app/countries/atlas_engine/fr/synonyms.yml +21 -0
- data/app/countries/atlas_engine/fr/validation_transcriber/address_parser.rb +34 -0
- data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +98 -0
- data/app/countries/atlas_engine/gb/country_profile.yml +10 -0
- data/app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb +164 -0
- data/app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb +120 -0
- data/app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb +39 -0
- data/app/countries/atlas_engine/gg/country_profile.yml +7 -0
- data/app/countries/atlas_engine/ie/country_profile.yml +3 -0
- data/app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/it/address_importer/corrections/open_address/province_corrector.rb +29 -0
- data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +42 -0
- data/app/countries/atlas_engine/it/country_profile.yml +11 -0
- data/app/countries/atlas_engine/jp/address_validation/es/data_mapper.rb +63 -0
- data/app/countries/atlas_engine/jp/country_profile.yml +6 -0
- data/app/countries/atlas_engine/kr/address_importer/open_address/mapper.rb +41 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +11 -0
- data/app/countries/atlas_engine/li/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/li/country_profile.yml +21 -0
- data/app/countries/atlas_engine/li/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/li/synonyms.yml +6 -0
- data/app/countries/atlas_engine/lt/country_profile.yml +6 -0
- data/app/countries/atlas_engine/lt/synonyms.yml +7 -0
- data/app/countries/atlas_engine/lt/validation_transcriber/address_parser.rb +24 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb +54 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +12 -0
- data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/nl/country_profile.yml +18 -0
- data/app/countries/atlas_engine/nl/index_configuration.yml +52 -0
- data/app/countries/atlas_engine/nl/synonyms.yml +92 -0
- data/app/countries/atlas_engine/nl/validation_transcriber/address_parser.rb +85 -0
- data/app/countries/atlas_engine/no/country_profile.yml +5 -0
- data/app/countries/atlas_engine/nz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +5 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +19 -0
- data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb +32 -0
- data/app/countries/atlas_engine/pt/address_importer/open_address/mapper.rb +39 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +10 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +7 -0
- data/app/countries/atlas_engine/sa/country_profile.yml +10 -0
- data/app/countries/atlas_engine/se/country_profile.yml +5 -0
- data/app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb +38 -0
- data/app/countries/atlas_engine/tt/country_profile.yml +7 -0
- data/app/countries/atlas_engine/us/country_profile.yml +12 -0
- data/app/countries/atlas_engine/us/synonyms.yml +350 -0
- data/app/graphql/atlas_engine/errors/locale_unsupported_error.rb +17 -0
- data/app/graphql/atlas_engine/schema.graphql +1293 -0
- data/app/graphql/atlas_engine/schema.rb +23 -0
- data/app/graphql/atlas_engine/types/address_validation/address_input.rb +51 -0
- data/app/graphql/atlas_engine/types/address_validation/concern_type.rb +20 -0
- data/app/graphql/atlas_engine/types/address_validation/enums/concern_enum.rb +15 -0
- data/app/graphql/atlas_engine/types/address_validation/field_type.rb +15 -0
- data/app/graphql/atlas_engine/types/address_validation/suggestion_type.rb +21 -0
- data/app/graphql/atlas_engine/types/base_argument.rb +9 -0
- data/app/graphql/atlas_engine/types/base_enum.rb +9 -0
- data/app/graphql/atlas_engine/types/base_field.rb +10 -0
- data/app/graphql/atlas_engine/types/base_input_object.rb +9 -0
- data/app/graphql/atlas_engine/types/base_interface.rb +10 -0
- data/app/graphql/atlas_engine/types/base_object.rb +9 -0
- data/app/graphql/atlas_engine/types/base_scalar.rb +9 -0
- data/app/graphql/atlas_engine/types/base_union.rb +9 -0
- data/app/graphql/atlas_engine/types/matching_strategy_type.rb +12 -0
- data/app/graphql/atlas_engine/types/mutation_type.rb +9 -0
- data/app/graphql/atlas_engine/types/query_type.rb +61 -0
- data/app/graphql/atlas_engine/types/validation_supported_country.rb +12 -0
- data/app/graphql/atlas_engine/types/validation_type.rb +22 -0
- data/app/helpers/atlas_engine/address_importer/import_log_helper.rb +66 -0
- data/app/helpers/atlas_engine/application_helper.rb +7 -0
- data/app/helpers/atlas_engine/locale_format_helper.rb +40 -0
- data/app/helpers/atlas_engine/log_base.rb +32 -0
- data/app/helpers/atlas_engine/log_helper.rb +24 -0
- data/app/helpers/atlas_engine/metrics_helper.rb +25 -0
- data/app/jobs/atlas_engine/address_importer/clear_records_job.rb +39 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +212 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_launcher_job.rb +67 -0
- data/app/jobs/atlas_engine/address_importer/open_address/prepares_geo_json_file.rb +41 -0
- data/app/jobs/atlas_engine/address_importer/resumable_import_job.rb +49 -0
- data/app/jobs/atlas_engine/address_importer/street_backfill_job.rb +63 -0
- data/app/jobs/atlas_engine/application_job.rb +10 -0
- data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +43 -0
- data/app/lib/atlas_engine/concern_formatter.rb +40 -0
- data/app/lib/atlas_engine/restrictions/base.rb +20 -0
- data/app/lib/atlas_engine/restrictions/unsupported_script.rb +31 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +201 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_factory.rb +27 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_north_america.rb +39 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_oceanic.rb +17 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_preprocessor.rb +132 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parsing_helper.rb +38 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +54 -0
- data/app/lib/atlas_engine/validation_transcriber/constants.rb +50 -0
- data/app/lib/atlas_engine/validation_transcriber/english_street_parser.rb +59 -0
- data/app/lib/atlas_engine/validation_transcriber/formatter.rb +46 -0
- data/app/lib/atlas_engine/validation_transcriber/french_street_parser.rb +50 -0
- data/app/lib/atlas_engine/validation_transcriber/province_code_normalizer.rb +45 -0
- data/app/lib/atlas_engine/validation_transcriber/street_parser.rb +18 -0
- data/app/lib/atlas_engine/validation_transcriber/zip_normalizer.rb +23 -0
- data/app/mailers/atlas_engine/application_mailer.rb +9 -0
- data/app/models/atlas_engine/address_importer/corrections/corrector.rb +33 -0
- data/app/models/atlas_engine/address_importer/import_events_notifier/base.rb +35 -0
- data/app/models/atlas_engine/address_importer/import_events_notifier/notifier.rb +26 -0
- data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +46 -0
- data/app/models/atlas_engine/address_importer/open_address/feature_helper.rb +110 -0
- data/app/models/atlas_engine/address_importer/open_address/filter.rb +17 -0
- data/app/models/atlas_engine/address_importer/open_address/loader.rb +27 -0
- data/app/models/atlas_engine/address_importer/open_address/transformer.rb +39 -0
- data/app/models/atlas_engine/address_importer/open_address.rb +10 -0
- data/app/models/atlas_engine/address_importer/validation/base_validator.rb +86 -0
- data/app/models/atlas_engine/address_importer/validation/default_validator.rb +27 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/city.rb +47 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/interface.rb +29 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/province.rb +73 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/zip.rb +84 -0
- data/app/models/atlas_engine/address_importer/validation/validator.rb +17 -0
- data/app/models/atlas_engine/address_importer/validation/wrapper.rb +70 -0
- data/app/models/atlas_engine/address_number.rb +36 -0
- data/app/models/atlas_engine/address_number_range.rb +200 -0
- data/app/models/atlas_engine/address_validation/abstract_address.rb +49 -0
- data/app/models/atlas_engine/address_validation/address.rb +47 -0
- data/app/models/atlas_engine/address_validation/candidate.rb +109 -0
- data/app/models/atlas_engine/address_validation/candidate_tuple.rb +15 -0
- data/app/models/atlas_engine/address_validation/concern.rb +74 -0
- data/app/models/atlas_engine/address_validation/concern_producer.rb +19 -0
- data/app/models/atlas_engine/address_validation/concern_queue.rb +20 -0
- data/app/models/atlas_engine/address_validation/concern_record.rb +122 -0
- data/app/models/atlas_engine/address_validation/datastore_base.rb +27 -0
- data/app/models/atlas_engine/address_validation/errors.rb +13 -0
- data/app/models/atlas_engine/address_validation/es/candidate_selector.rb +70 -0
- data/app/models/atlas_engine/address_validation/es/data_mappers/decompounding_data_mapper.rb +39 -0
- data/app/models/atlas_engine/address_validation/es/data_mappers/default_data_mapper.rb +110 -0
- data/app/models/atlas_engine/address_validation/es/datastore.rb +229 -0
- data/app/models/atlas_engine/address_validation/es/default_query_builder.rb +30 -0
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +160 -0
- data/app/models/atlas_engine/address_validation/es/term_vectors.rb +78 -0
- data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +123 -0
- data/app/models/atlas_engine/address_validation/es/validators/full_address_street.rb +18 -0
- data/app/models/atlas_engine/address_validation/es/validators/restriction_evaluator.rb +37 -0
- data/app/models/atlas_engine/address_validation/field.rb +30 -0
- data/app/models/atlas_engine/address_validation/full_address_validator_base.rb +27 -0
- data/app/models/atlas_engine/address_validation/log_emitter.rb +66 -0
- data/app/models/atlas_engine/address_validation/matching_strategies.rb +16 -0
- data/app/models/atlas_engine/address_validation/normalizer.rb +38 -0
- data/app/models/atlas_engine/address_validation/predicate_pipeline.rb +80 -0
- data/app/models/atlas_engine/address_validation/request.rb +12 -0
- data/app/models/atlas_engine/address_validation/result.rb +154 -0
- data/app/models/atlas_engine/address_validation/runs_validation.rb +16 -0
- data/app/models/atlas_engine/address_validation/session.rb +47 -0
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +72 -0
- data/app/models/atlas_engine/address_validation/strategies.rb +10 -0
- data/app/models/atlas_engine/address_validation/suggestion.rb +97 -0
- data/app/models/atlas_engine/address_validation/token/comparator.rb +44 -0
- data/app/models/atlas_engine/address_validation/token/comparison.rb +76 -0
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +158 -0
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +166 -0
- data/app/models/atlas_engine/address_validation/token/sequence.rb +147 -0
- data/app/models/atlas_engine/address_validation/token/synonyms.rb +77 -0
- data/app/models/atlas_engine/address_validation/token.rb +113 -0
- data/app/models/atlas_engine/address_validation/validator.rb +147 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +97 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +164 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result_base.rb +46 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +135 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/components_to_validate.rb +88 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +127 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +23 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_concern_builder.rb +42 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_country_concern.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_province_concern.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/no_candidate_result.rb +26 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/number_comparison.rb +31 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/postal_code_matcher.rb +60 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/result_updater.rb +42 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +140 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_address_concern.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_province_concern.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_zip_for_address_concern.rb +32 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern.rb +84 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unsupported_script_result.rb +22 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/cache.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +36 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/country/exists.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/country/valid_for_zip.rb +60 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +39 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_token_count.rb +63 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +41 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/predicate.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +43 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +48 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +45 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +43 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +35 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/present.rb +58 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_country.rb +45 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_province.rb +55 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +25 -0
- data/app/models/atlas_engine/address_validation/zip_truncator.rb +32 -0
- data/app/models/atlas_engine/application_record.rb +8 -0
- data/app/models/atlas_engine/coded_error.rb +18 -0
- data/app/models/atlas_engine/coded_errors.rb +17 -0
- data/app/models/atlas_engine/country_import.rb +44 -0
- data/app/models/atlas_engine/country_profile.rb +270 -0
- data/app/models/atlas_engine/country_profile_ingestion_subset.rb +42 -0
- data/app/models/atlas_engine/country_profile_subset_base.rb +22 -0
- data/app/models/atlas_engine/country_profile_validation_subset.rb +48 -0
- data/app/models/atlas_engine/country_repository.rb +110 -0
- data/app/models/atlas_engine/elasticsearch/client.rb +116 -0
- data/app/models/atlas_engine/elasticsearch/client_interface.rb +89 -0
- data/app/models/atlas_engine/elasticsearch/repository.rb +246 -0
- data/app/models/atlas_engine/elasticsearch/repository_interface.rb +82 -0
- data/app/models/atlas_engine/elasticsearch/response.rb +20 -0
- data/app/models/atlas_engine/event.rb +12 -0
- data/app/models/atlas_engine/field_decompounder.rb +36 -0
- data/app/models/atlas_engine/index_configuration_factory.rb +188 -0
- data/app/models/atlas_engine/post_address.rb +114 -0
- data/app/models/atlas_engine/post_address_importer.rb +34 -0
- data/app/models/atlas_engine/services/service_helper.rb +21 -0
- data/app/models/atlas_engine/services/validation.rb +65 -0
- data/app/models/atlas_engine/services/validation_eligibility.rb +18 -0
- data/app/models/atlas_engine/street.rb +34 -0
- data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +106 -0
- data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +29 -0
- data/app/views/atlas_engine/connectivity/index.html.erb +50 -0
- data/app/views/atlas_engine/country_imports/index.html.erb +49 -0
- data/app/views/atlas_engine/country_imports/show.html.erb +73 -0
- data/app/views/layouts/atlas_engine/application.html.erb +15 -0
- data/config/initializers/1.ruby_patches.rb +18 -0
- data/config/initializers/sorbet.rb +5 -0
- data/config/initializers/worldwide.rb +5 -0
- data/config/locales/internal/en.yml +14 -0
- data/config/routes.rb +17 -0
- data/db/data/address_synonyms/index_configurations/default.yml +141 -0
- data/db/data/country_profiles/default.yml +23 -0
- data/db/data/transcriber.yml +760 -0
- data/db/data/validation_pipelines/es.yml +58 -0
- data/db/data/validation_pipelines/es_street.yml +58 -0
- data/db/data/validation_pipelines/local.yml +60 -0
- data/db/migrate/20230919173037_create_atlas_engine_post_addresses.rb +25 -0
- data/db/migrate/20231117142735_add_building_and_unit_ranges_column.rb +7 -0
- data/db/migrate/20231117143536_create_atlas_engine_country_imports.rb +11 -0
- data/db/migrate/20231117145844_create_atlas_engine_events_table.rb +13 -0
- data/db/migrate/20231123153554_add_unique_index_to_atlas_engine_post_addresses.rb +14 -0
- data/db/migrate/20231123154658_add_index_to_post_addresses_on_source_id_locale_country_code.rb +12 -0
- data/lib/atlas_engine/engine.rb +10 -0
- data/lib/atlas_engine/version.rb +6 -0
- data/lib/atlas_engine.rb +66 -0
- data/lib/tasks/atlas_engine/address_importer.rake +20 -0
- metadata +553 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParserBase
|
7
|
+
extend T::Sig
|
8
|
+
extend T::Helpers
|
9
|
+
include Formatter
|
10
|
+
|
11
|
+
AddressComponents = T.type_alias { T::Hash[Symbol, String] }
|
12
|
+
|
13
|
+
# Note that parse() returns an array of possible interpretations.
|
14
|
+
# This is because some address lines are ambiguous, and can be interpreted multiple ways.
|
15
|
+
# Example 1: "123 County Road 45"
|
16
|
+
# - {building_num: "123", street: "County Road", unit_num: "45"}
|
17
|
+
# - {building_num: "123", street: "County Road 45"}
|
18
|
+
# Example 2: "123 E 45"
|
19
|
+
# - {building_num: "123", street: "E", unit_num: "45"} # 123 E Street Apt 45
|
20
|
+
# - {building_num: "123", street: "E 45"} # 123 East 45th Street
|
21
|
+
|
22
|
+
# Parts that we slot into address format regular expressions
|
23
|
+
|
24
|
+
BUILDING_NAME = "(?<building_name>[\\w ]+)"
|
25
|
+
BUILDING_NUM =
|
26
|
+
"(?<building_num>("\
|
27
|
+
'([[:digit:]]+\s)?([[:digit:]]+/[[:digit:]]+)|'\
|
28
|
+
'[[:digit:]][[:alpha:][:digit:]/\-]*|'\
|
29
|
+
'[[:alpha:]][[:digit:]][[:alpha:][:digit:]/\-]*'\
|
30
|
+
"))"
|
31
|
+
NUMERIC_ONLY_BUILDING_NUM =
|
32
|
+
"(?<building_num>("\
|
33
|
+
'([[:digit:]]+\s+)?[[:digit:]][[:digit:]/]*[[:digit:]]|'\
|
34
|
+
"[[:digit:]]+"\
|
35
|
+
"))"
|
36
|
+
NON_NUMERIC_STREET = "(?<street>[^[:digit:]/ -].*)"
|
37
|
+
STREET = "(?<street>.+)"
|
38
|
+
STREET_NO_COMMAS = "(?<street>[^,]+)"
|
39
|
+
UNIT_TYPE = '(?<unit_type>[[:alpha:]]+\.?)'
|
40
|
+
UNIT_NUM = '(?<unit_num>[[:alpha:][:digit:]/\-]+)'
|
41
|
+
UNIT_NUM_NO_HYPHEN = "(?<unit_num>[[:alpha:][:digit:]/]+)"
|
42
|
+
PO_BOX = %r{(?:^|\s|/)(?:p(?:ost)?\.?\s*o(?:ffice)?\.?\s*box|box|postal\s*box)\s+(\d+)(?:$|\s)}i
|
43
|
+
|
44
|
+
sig do
|
45
|
+
params(address: AddressValidation::AbstractAddress, preprocessor: T.nilable(AddressParserPreprocessor)).void
|
46
|
+
end
|
47
|
+
def initialize(address:, preprocessor: nil)
|
48
|
+
raise ArgumentError, "country_code cannot be blank in address" if address.country_code.blank?
|
49
|
+
|
50
|
+
@constants = T.let(
|
51
|
+
AtlasEngine::ValidationTranscriber::Constants.instance,
|
52
|
+
AtlasEngine::ValidationTranscriber::Constants,
|
53
|
+
)
|
54
|
+
@country_regex_formats = T.let(nil, T.nilable(T::Array[Regexp]))
|
55
|
+
@address = T.let(address, AddressValidation::TAddress)
|
56
|
+
|
57
|
+
@preprocessor = T.let(
|
58
|
+
preprocessor || AddressParserPreprocessor.new(address: @address),
|
59
|
+
T.nilable(ValidationTranscriber::AddressParserPreprocessor),
|
60
|
+
)
|
61
|
+
end
|
62
|
+
|
63
|
+
sig { returns(T::Array[AddressComponents]) }
|
64
|
+
def parse
|
65
|
+
candidates = []
|
66
|
+
|
67
|
+
address_lines = @preprocessor&.generate_combinations
|
68
|
+
|
69
|
+
return candidates if address_lines&.empty?
|
70
|
+
|
71
|
+
address_lines&.each do |address_line|
|
72
|
+
address_line, po_box = extract_po_box(address_line)
|
73
|
+
|
74
|
+
country_regex_formats.each do |format|
|
75
|
+
m = format.match(address_line)
|
76
|
+
next if m.nil?
|
77
|
+
|
78
|
+
captures = m.named_captures.symbolize_keys
|
79
|
+
|
80
|
+
next if ridiculous?(captures, @address)
|
81
|
+
|
82
|
+
captures = captures.compact_blank.transform_values! do |value|
|
83
|
+
strip_trailing_punctuation(value)
|
84
|
+
end
|
85
|
+
captures[:po_box] = po_box if po_box
|
86
|
+
|
87
|
+
candidates << captures
|
88
|
+
end
|
89
|
+
|
90
|
+
if po_box && candidates.empty?
|
91
|
+
candidates << { po_box: po_box }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
candidates.uniq
|
96
|
+
end
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
sig { returns(T::Array[Regexp]) }
|
101
|
+
def country_regex_formats
|
102
|
+
[]
|
103
|
+
end
|
104
|
+
|
105
|
+
sig { params(address_line: String).returns(T::Array[T.nilable(String)]) }
|
106
|
+
def extract_po_box(address_line)
|
107
|
+
po_box_match = address_line.match(PO_BOX)
|
108
|
+
|
109
|
+
if po_box_match
|
110
|
+
po_box = po_box_match[1]
|
111
|
+
address_line = address_line.gsub(PO_BOX, "").strip
|
112
|
+
else
|
113
|
+
po_box = nil
|
114
|
+
end
|
115
|
+
|
116
|
+
[address_line, po_box]
|
117
|
+
end
|
118
|
+
|
119
|
+
# Return true if something's obviously wrong with this regex match
|
120
|
+
sig do
|
121
|
+
params(
|
122
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
123
|
+
address: AddressValidation::AbstractAddress,
|
124
|
+
).returns(T::Boolean)
|
125
|
+
end
|
126
|
+
def ridiculous?(captures, address)
|
127
|
+
building_num = captures[:building_num]&.downcase
|
128
|
+
street = captures[:street]&.downcase
|
129
|
+
unit_num = captures[:unit_num]&.downcase
|
130
|
+
unit_type = captures[:unit_type]&.downcase
|
131
|
+
num_street_space = captures[:num_street_space] # space between building_num and street, if present
|
132
|
+
|
133
|
+
if street.present?
|
134
|
+
return true unless address.address1&.upcase&.include?(street.upcase) ||
|
135
|
+
address.address2&.upcase&.include?(street.upcase)
|
136
|
+
end
|
137
|
+
|
138
|
+
return true if [building_num, street].any? do |token|
|
139
|
+
po_box?(token) || street_suffix?(token)
|
140
|
+
end
|
141
|
+
|
142
|
+
return false if unit_num.present? && secondary_unit_designator?(unit_type)
|
143
|
+
|
144
|
+
return true if [unit_num, unit_type].any? do |token|
|
145
|
+
po_box?(token) || street_suffix?(token)
|
146
|
+
end
|
147
|
+
|
148
|
+
street_tokens_ridiculous?(
|
149
|
+
street: T.must(street),
|
150
|
+
unit_type: unit_type,
|
151
|
+
unit_num: unit_num,
|
152
|
+
num_street_space: num_street_space,
|
153
|
+
)
|
154
|
+
end
|
155
|
+
|
156
|
+
sig { params(token: T.nilable(String)).returns(T::Boolean) }
|
157
|
+
def po_box?(token)
|
158
|
+
return false if token.blank?
|
159
|
+
|
160
|
+
token.match?(/^\s*p\.?\s*o\.?\s*box\s*$/) ||
|
161
|
+
token.match?(/^\s*post\s*office\s*box\s*$/)
|
162
|
+
end
|
163
|
+
|
164
|
+
sig { params(token: T.nilable(String)).returns(T::Boolean) }
|
165
|
+
def secondary_unit_designator?(token)
|
166
|
+
@constants.known?(:secondary_unit_designators, token)
|
167
|
+
end
|
168
|
+
|
169
|
+
sig { params(token: T.nilable(String)).returns(T::Boolean) }
|
170
|
+
def street_suffix?(token)
|
171
|
+
@constants.known?(:street_suffixes, token)
|
172
|
+
end
|
173
|
+
|
174
|
+
sig do
|
175
|
+
params(
|
176
|
+
street: T.nilable(String),
|
177
|
+
unit_type: T.nilable(String),
|
178
|
+
unit_num: T.nilable(String),
|
179
|
+
num_street_space: T.nilable(String),
|
180
|
+
)
|
181
|
+
.returns(T::Boolean)
|
182
|
+
end
|
183
|
+
def street_tokens_ridiculous?(street:, unit_type:, unit_num:, num_street_space:)
|
184
|
+
return false if street.blank?
|
185
|
+
|
186
|
+
street_tokens = street.to_s.split(" ")
|
187
|
+
return true if secondary_unit_designator?(street_tokens[-1]) && !street_suffix?(street_tokens[-1])
|
188
|
+
return true if secondary_unit_designator?(street_tokens[-2]) && !street_suffix?(street_tokens[-1])
|
189
|
+
return true if street_tokens.last&.start_with?("#")
|
190
|
+
return true if unit_type.present? && !secondary_unit_designator?(unit_type)
|
191
|
+
|
192
|
+
["st", "nd", "rd", "th", "er", "eme"].each do |suffix|
|
193
|
+
return true if unit_num&.end_with?(suffix)
|
194
|
+
return true if num_street_space.nil? && street.split(" ").first == suffix
|
195
|
+
end
|
196
|
+
|
197
|
+
false
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParserFactory
|
7
|
+
class << self
|
8
|
+
extend T::Sig
|
9
|
+
|
10
|
+
sig do
|
11
|
+
params(address: AddressValidation::AbstractAddress, locale: T.nilable(String)).returns(AddressParserBase)
|
12
|
+
end
|
13
|
+
def create(address:, locale: nil)
|
14
|
+
raise ArgumentError, "country_code cannot be nil" if address.country_code.nil?
|
15
|
+
|
16
|
+
profile = CountryProfile.for(T.must(address.country_code), locale)
|
17
|
+
|
18
|
+
if locale.nil? && profile.validation.multi_locale?
|
19
|
+
raise ArgumentError, "#{address.country_code} is a multi-locale country and requires a locale"
|
20
|
+
end
|
21
|
+
|
22
|
+
profile.validation.address_parser.new(address: address)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParserNorthAmerica < AddressParserBase
|
7
|
+
private
|
8
|
+
|
9
|
+
sig { returns(T::Array[Regexp]) }
|
10
|
+
def country_regex_formats
|
11
|
+
@country_regex_formats ||= [
|
12
|
+
"#{BUILDING_NUM}(?<num_street_space>\s+)#{STREET}",
|
13
|
+
"#{NUMERIC_ONLY_BUILDING_NUM}#{NON_NUMERIC_STREET}",
|
14
|
+
].map do |building_and_street_expr|
|
15
|
+
north_american_variants(building_and_street_expr)
|
16
|
+
end.flatten.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
# {building_num} {street}
|
20
|
+
# {unit_num}-{building_num} {street}
|
21
|
+
# {building_num} {street} {unit_type} {unit_num}
|
22
|
+
# {building_num} {street} #{unit_num}
|
23
|
+
# {building_num} {street} {unit_num}
|
24
|
+
# {building_num} {street} - {unit_num}
|
25
|
+
sig { params(building_and_street_expr: String).returns(T::Array[Regexp]) }
|
26
|
+
def north_american_variants(building_and_street_expr)
|
27
|
+
[
|
28
|
+
/^#{building_and_street_expr}$/,
|
29
|
+
/^(#{UNIT_NUM_NO_HYPHEN}-)?#{building_and_street_expr}$/,
|
30
|
+
/^#{building_and_street_expr}\s+#{UNIT_TYPE}\s+#{UNIT_NUM}/,
|
31
|
+
/^#{building_and_street_expr}\s+\#\s*#{UNIT_NUM}/,
|
32
|
+
/^#{building_and_street_expr}\s+-\s+#{UNIT_NUM}/,
|
33
|
+
/^#{BUILDING_NAME}\s#{building_and_street_expr}$/,
|
34
|
+
/^#{UNIT_TYPE}\s+#{UNIT_NUM_NO_HYPHEN}\s+#{building_and_street_expr}$/,
|
35
|
+
]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParserOceanic < AddressParserBase
|
7
|
+
private
|
8
|
+
|
9
|
+
sig { returns(T::Array[Regexp]) }
|
10
|
+
def country_regex_formats
|
11
|
+
@country_regex_formats ||= [
|
12
|
+
%r{^((?<unit_num>[[:alpha:]0-9]+)/)?(?<building_num>[0-9][[:alpha:]0-9]*)\s+(?<street>.+)$},
|
13
|
+
]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParserPreprocessor
|
7
|
+
include Formatter
|
8
|
+
include AddressParsingHelper
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
sig do
|
12
|
+
params(
|
13
|
+
address: AddressValidation::TAddress,
|
14
|
+
).void
|
15
|
+
end
|
16
|
+
def initialize(address:)
|
17
|
+
raise ArgumentError, "country_code cannot be blank in address" if address.country_code.blank?
|
18
|
+
|
19
|
+
@country = T.let(Worldwide.region(code: address.country_code), Worldwide::Region)
|
20
|
+
@address = address
|
21
|
+
@combinations = T.let(Set.new, T::Set[T.nilable(String)])
|
22
|
+
end
|
23
|
+
|
24
|
+
sig { returns(T::Array[String]) }
|
25
|
+
def generate_combinations
|
26
|
+
@combinations << @address.address1
|
27
|
+
@combinations << @address.address2
|
28
|
+
@combinations << combined_address_lines
|
29
|
+
@combinations << address_1_stripped_of_known_components_excluding_zip
|
30
|
+
@combinations << address_1_stripped_of_known_components
|
31
|
+
@combinations << address_1_sliced_on_street
|
32
|
+
|
33
|
+
@combinations.compact_blank.uniq
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
sig { returns(String) }
|
39
|
+
def combined_address_lines
|
40
|
+
[@address.address1, @address.address2].compact_blank.join(" ")
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { returns(T.nilable(String)) }
|
44
|
+
def address_1_stripped_of_known_components_excluding_zip
|
45
|
+
@address_1_stripped_of_known_components_excluding_zip ||= T.let(
|
46
|
+
begin
|
47
|
+
# rubocop:disable Lint/NoReturnInBeginEndBlocks
|
48
|
+
return if @address.address1.blank? || @address.nil?
|
49
|
+
# rubocop:enable Lint/NoReturnInBeginEndBlocks
|
50
|
+
|
51
|
+
components_to_strip = [
|
52
|
+
@address.address2,
|
53
|
+
@address.city,
|
54
|
+
possible_province_words,
|
55
|
+
possible_country_words,
|
56
|
+
]
|
57
|
+
|
58
|
+
address_line = T.must(@address.address1)
|
59
|
+
components_to_strip.flatten.compact_blank.each do |address_component|
|
60
|
+
address_line = strip_word(address_line, address_component)
|
61
|
+
end
|
62
|
+
|
63
|
+
address_line
|
64
|
+
end,
|
65
|
+
T.nilable(String),
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
sig { returns(T.nilable(String)) }
|
70
|
+
def address_1_stripped_of_known_components
|
71
|
+
modified_address1 = address_1_stripped_of_known_components_excluding_zip
|
72
|
+
possible_zip = possible_zip_word
|
73
|
+
return if modified_address1.blank? || possible_zip.blank?
|
74
|
+
|
75
|
+
strip_word(modified_address1, possible_zip)
|
76
|
+
end
|
77
|
+
|
78
|
+
sig { returns(T.nilable(String)) }
|
79
|
+
def address_1_sliced_on_street
|
80
|
+
return unless @country.legacy_code == "US" && @address.address1.present?
|
81
|
+
|
82
|
+
address_line_tokens = T.must(@address.address1).split(" ")
|
83
|
+
street_suffix_index = address_line_tokens.rindex do |token|
|
84
|
+
street_suffix?(token)
|
85
|
+
end
|
86
|
+
|
87
|
+
return unless street_suffix_index
|
88
|
+
|
89
|
+
index_to_slice = if directional?(address_line_tokens[street_suffix_index + 1])
|
90
|
+
street_suffix_index + 1
|
91
|
+
else
|
92
|
+
street_suffix_index
|
93
|
+
end
|
94
|
+
|
95
|
+
slice_at_index(address_line_tokens, index_to_slice)
|
96
|
+
end
|
97
|
+
|
98
|
+
sig { returns(T::Array[String]) }
|
99
|
+
def possible_province_words
|
100
|
+
province_code = @address.province_code
|
101
|
+
return [] if province_code.blank?
|
102
|
+
|
103
|
+
zone = @country.zone(code: province_code)
|
104
|
+
return [] unless zone.province?
|
105
|
+
|
106
|
+
[
|
107
|
+
province_code,
|
108
|
+
zone.code_alternates,
|
109
|
+
zone.name_alternates,
|
110
|
+
zone.full_name,
|
111
|
+
].flatten.compact
|
112
|
+
end
|
113
|
+
|
114
|
+
sig { returns(T::Array[String]) }
|
115
|
+
def possible_country_words
|
116
|
+
[@country.legacy_code.to_s, @country.full_name, @country.name_alternates].flatten.compact
|
117
|
+
end
|
118
|
+
|
119
|
+
sig { returns(T.nilable(String)) }
|
120
|
+
def possible_zip_word
|
121
|
+
return if @address.zip.blank?
|
122
|
+
|
123
|
+
@address.zip if @country.valid_zip?(@address.zip)
|
124
|
+
end
|
125
|
+
|
126
|
+
sig { params(tokens: T::Array[String], index: Integer).returns(String) }
|
127
|
+
def slice_at_index(tokens, index)
|
128
|
+
T.must(tokens[..index]).join(" ")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
module AddressParsingHelper
|
7
|
+
extend T::Sig
|
8
|
+
|
9
|
+
sig { returns(Constants) }
|
10
|
+
def address_constants
|
11
|
+
@address_constants ||= T.let(
|
12
|
+
Constants.instance,
|
13
|
+
T.nilable(Constants),
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
sig { params(token: T.nilable(String)).returns(T::Boolean) }
|
18
|
+
def directional?(token)
|
19
|
+
return false if token.blank?
|
20
|
+
|
21
|
+
downcased = token.downcase
|
22
|
+
english = address_constants.translations_fr_en[downcased.to_sym] || downcased
|
23
|
+
|
24
|
+
address_constants.known?(:directionals, english)
|
25
|
+
end
|
26
|
+
|
27
|
+
sig { params(token: T.nilable(String)).returns(T::Boolean) }
|
28
|
+
def street_suffix?(token)
|
29
|
+
return false if token.blank?
|
30
|
+
|
31
|
+
downcased = token.downcase
|
32
|
+
english = address_constants.translations_fr_en[downcased.to_sym] || downcased
|
33
|
+
|
34
|
+
address_constants.known?(:street_suffixes, english)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class AddressParsings
|
7
|
+
extend T::Sig
|
8
|
+
include LogHelper
|
9
|
+
|
10
|
+
ParsedComponents = T.type_alias { T::Hash[Symbol, String] }
|
11
|
+
|
12
|
+
sig { returns(T::Array[ParsedComponents]) }
|
13
|
+
attr_reader :parsings
|
14
|
+
|
15
|
+
sig { params(address_input: AddressValidation::AbstractAddress, locale: T.nilable(String)).void }
|
16
|
+
def initialize(address_input:, locale: nil)
|
17
|
+
@parsings = T.let(
|
18
|
+
begin
|
19
|
+
if address_input.country_code.blank?
|
20
|
+
[]
|
21
|
+
else
|
22
|
+
parsing_result = AddressParserFactory.create(address: address_input, locale: locale).parse
|
23
|
+
log_unparsable_address(address_input) if parsing_result.empty?
|
24
|
+
parsing_result
|
25
|
+
end
|
26
|
+
end,
|
27
|
+
T::Array[ParsedComponents],
|
28
|
+
)
|
29
|
+
end
|
30
|
+
|
31
|
+
sig { returns(T::Boolean) }
|
32
|
+
def describes_po_box?
|
33
|
+
parsings.any? { |parsing| parsing[:po_box] }
|
34
|
+
end
|
35
|
+
|
36
|
+
sig { returns(T::Array[String]) }
|
37
|
+
def potential_streets
|
38
|
+
potential_parsings = parsings.pluck(:street).compact
|
39
|
+
potential_parsings << "po box" if describes_po_box?
|
40
|
+
potential_parsings.uniq
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { returns(T::Array[String]) }
|
44
|
+
def potential_building_numbers
|
45
|
+
parsings.pluck(:building_num).compact.uniq
|
46
|
+
end
|
47
|
+
|
48
|
+
sig { params(address_input: AddressValidation::AbstractAddress).void }
|
49
|
+
def log_unparsable_address(address_input)
|
50
|
+
log_info("[AddressValidation] Unable to parse address lines", address_input.to_h.except(:phone))
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "singleton"
|
5
|
+
|
6
|
+
module AtlasEngine
|
7
|
+
module ValidationTranscriber
|
8
|
+
class Constants
|
9
|
+
extend T::Sig
|
10
|
+
include Singleton
|
11
|
+
|
12
|
+
TRANSCRIBER_FILE = File.join(AtlasEngine::Engine.root, "db/data/transcriber.yml").freeze
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def instance
|
16
|
+
@instance ||= new
|
17
|
+
end
|
18
|
+
|
19
|
+
def create_accessor_methods(yaml_hash)
|
20
|
+
yaml_hash.each do |constant_name, _|
|
21
|
+
define_method(constant_name.to_s) do
|
22
|
+
@data[constant_name]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
sig { void }
|
29
|
+
def initialize
|
30
|
+
@data ||= load_yaml_file(TRANSCRIBER_FILE)
|
31
|
+
self.class.create_accessor_methods(@data)
|
32
|
+
end
|
33
|
+
|
34
|
+
sig { params(constant_type: Symbol, value: T.nilable(String)).returns(T::Boolean) }
|
35
|
+
def known?(constant_type, value)
|
36
|
+
constants = @data[constant_type]
|
37
|
+
return false if constants.blank? || value.blank?
|
38
|
+
|
39
|
+
downcased = value.delete_suffix(".").downcase
|
40
|
+
constants.key?(downcased.to_sym) || constants.value?(downcased)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def load_yaml_file(filename)
|
46
|
+
YAML.load_file(filename, freeze: true).deep_symbolize_keys
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# typed: false
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
class EnglishStreetParser
|
7
|
+
include AddressParsingHelper
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
super
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse(street:)
|
14
|
+
return {} if street.blank?
|
15
|
+
|
16
|
+
# Expected format: [pre_directional, name, suffix, post_directional]
|
17
|
+
# Note that pre_directional and post_directional may be absent, one word ("East"), or two words ("North East")
|
18
|
+
|
19
|
+
pre_directional = nil
|
20
|
+
suffix = nil
|
21
|
+
post_directional = nil
|
22
|
+
|
23
|
+
tokens = street.split(" ")
|
24
|
+
|
25
|
+
if directional?(tokens[0])
|
26
|
+
if directional?(tokens[1])
|
27
|
+
pre_directional = tokens[0..1].join(" ")
|
28
|
+
tokens = tokens[2..-1]
|
29
|
+
else
|
30
|
+
pre_directional = tokens[0]
|
31
|
+
tokens = tokens[1..-1]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
if directional?(tokens[-1])
|
36
|
+
if directional?(tokens[-2])
|
37
|
+
post_directional = tokens[-2..-1].join(" ")
|
38
|
+
tokens = tokens[0..-3]
|
39
|
+
else
|
40
|
+
post_directional = tokens[-1]
|
41
|
+
tokens = tokens[0..-2]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
if street_suffix?(tokens[-1])
|
46
|
+
suffix = tokens[-1]
|
47
|
+
tokens = tokens[0..-2]
|
48
|
+
end
|
49
|
+
|
50
|
+
{
|
51
|
+
pre_directional: pre_directional,
|
52
|
+
name: tokens.join(" "),
|
53
|
+
suffix: suffix,
|
54
|
+
post_directional: post_directional,
|
55
|
+
}.compact_blank.to_h
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module ValidationTranscriber
|
6
|
+
module Formatter
|
7
|
+
extend T::Sig
|
8
|
+
|
9
|
+
sig { params(text: T.nilable(String)).returns(T.nilable(String)) }
|
10
|
+
def strip_trailing_punctuation(text)
|
11
|
+
text.presence&.sub(/[\s,\-]+$/, "")
|
12
|
+
end
|
13
|
+
|
14
|
+
sig { params(haystack: String, needle: String).returns(String) }
|
15
|
+
def strip_word(haystack, needle)
|
16
|
+
string = haystack.sub(/[\s](#{Regexp.escape(needle)})([\s]|$)/i, " ").strip
|
17
|
+
string = string.sub(/[\s,](#{Regexp.escape(needle)})([\s,]|$)/i, "").strip
|
18
|
+
string = strip_trailing_punctuation(string)
|
19
|
+
string || ""
|
20
|
+
end
|
21
|
+
|
22
|
+
sig do
|
23
|
+
params(
|
24
|
+
address1: String,
|
25
|
+
address2: String,
|
26
|
+
city: String,
|
27
|
+
province_code: String,
|
28
|
+
zip: String,
|
29
|
+
country_code: String,
|
30
|
+
phone: String,
|
31
|
+
).returns(AddressValidation::Address)
|
32
|
+
end
|
33
|
+
def build_address(address1: "", address2: "", city: "", province_code: "", zip: "", country_code: "", phone: "")
|
34
|
+
AddressValidation::Address.new(
|
35
|
+
address1: address1,
|
36
|
+
address2: address2,
|
37
|
+
city: city,
|
38
|
+
province_code: province_code,
|
39
|
+
zip: zip,
|
40
|
+
country_code: country_code,
|
41
|
+
phone: phone,
|
42
|
+
)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|