atlas_engine 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +123 -0
- data/Rakefile +20 -0
- data/app/assets/config/atlas_engine_manifest.js +3 -0
- data/app/assets/stylesheets/atlas_engine/application.css +15 -0
- data/app/concerns/atlas_engine/handles_blob.rb +26 -0
- data/app/concerns/atlas_engine/handles_interruption.rb +22 -0
- data/app/controllers/atlas_engine/application_controller.rb +7 -0
- data/app/controllers/atlas_engine/connectivity_controller.rb +21 -0
- data/app/controllers/atlas_engine/country_imports_controller.rb +73 -0
- data/app/controllers/atlas_engine/graphql_controller.rb +59 -0
- data/app/countries/atlas_engine/ar/country_profile.yml +9 -0
- data/app/countries/atlas_engine/at/address_importer/corrections/open_address/city_corrector.rb +23 -0
- data/app/countries/atlas_engine/at/country_profile.yml +24 -0
- data/app/countries/atlas_engine/at/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/at/synonyms.yml +6 -0
- data/app/countries/atlas_engine/at/validation_transcriber/address_parser.rb +58 -0
- data/app/countries/atlas_engine/au/address_importer/open_address/filter.rb +26 -0
- data/app/countries/atlas_engine/au/address_importer/open_address/mapper.rb +41 -0
- data/app/countries/atlas_engine/au/country_profile.yml +13 -0
- data/app/countries/atlas_engine/au/synonyms.yml +209 -0
- data/app/countries/atlas_engine/au/validation_transcriber/address_parser.rb +121 -0
- data/app/countries/atlas_engine/be/country_profile.yml +12 -0
- data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +38 -0
- data/app/countries/atlas_engine/bm/address_importer/open_address/mapper.rb +40 -0
- data/app/countries/atlas_engine/bm/country_profile.yml +12 -0
- data/app/countries/atlas_engine/br/country_profile.yml +4 -0
- data/app/countries/atlas_engine/ca/country_profile.yml +7 -0
- data/app/countries/atlas_engine/ca/synonyms.yml +1615 -0
- data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/city_corrector.rb +29 -0
- data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/locale_corrector.rb +74 -0
- data/app/countries/atlas_engine/ch/address_importer/open_address/mapper.rb +40 -0
- data/app/countries/atlas_engine/ch/country_profile.yml +15 -0
- data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +15 -0
- data/app/countries/atlas_engine/ch/locales/de/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/ch/locales/de/synonyms.yml +7 -0
- data/app/countries/atlas_engine/ch/locales/fr/synonyms.yml +21 -0
- data/app/countries/atlas_engine/cz/country_profile.yml +6 -0
- data/app/countries/atlas_engine/de/country_profile.yml +19 -0
- data/app/countries/atlas_engine/de/index_configuration.yml +64 -0
- data/app/countries/atlas_engine/de/synonyms.yml +2 -0
- data/app/countries/atlas_engine/de/validation_transcriber/address_parser.rb +19 -0
- data/app/countries/atlas_engine/dk/country_profile.yml +6 -0
- data/app/countries/atlas_engine/dk/synonyms.yml +3 -0
- data/app/countries/atlas_engine/dk/validation_transcriber/address_parser.rb +21 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +5 -0
- data/app/countries/atlas_engine/fr/address_importer/corrections/open_address/city_corrector.rb +28 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +13 -0
- data/app/countries/atlas_engine/fr/synonyms.yml +21 -0
- data/app/countries/atlas_engine/fr/validation_transcriber/address_parser.rb +34 -0
- data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +98 -0
- data/app/countries/atlas_engine/gb/country_profile.yml +10 -0
- data/app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb +164 -0
- data/app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb +120 -0
- data/app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb +39 -0
- data/app/countries/atlas_engine/gg/country_profile.yml +7 -0
- data/app/countries/atlas_engine/ie/country_profile.yml +3 -0
- data/app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/it/address_importer/corrections/open_address/province_corrector.rb +29 -0
- data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +42 -0
- data/app/countries/atlas_engine/it/country_profile.yml +11 -0
- data/app/countries/atlas_engine/jp/address_validation/es/data_mapper.rb +63 -0
- data/app/countries/atlas_engine/jp/country_profile.yml +6 -0
- data/app/countries/atlas_engine/kr/address_importer/open_address/mapper.rb +41 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +11 -0
- data/app/countries/atlas_engine/li/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/li/country_profile.yml +21 -0
- data/app/countries/atlas_engine/li/index_configuration.yml +63 -0
- data/app/countries/atlas_engine/li/synonyms.yml +6 -0
- data/app/countries/atlas_engine/lt/country_profile.yml +6 -0
- data/app/countries/atlas_engine/lt/synonyms.yml +7 -0
- data/app/countries/atlas_engine/lt/validation_transcriber/address_parser.rb +24 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb +54 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +12 -0
- data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/nl/country_profile.yml +18 -0
- data/app/countries/atlas_engine/nl/index_configuration.yml +52 -0
- data/app/countries/atlas_engine/nl/synonyms.yml +92 -0
- data/app/countries/atlas_engine/nl/validation_transcriber/address_parser.rb +85 -0
- data/app/countries/atlas_engine/no/country_profile.yml +5 -0
- data/app/countries/atlas_engine/nz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +5 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +19 -0
- data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb +32 -0
- data/app/countries/atlas_engine/pt/address_importer/open_address/mapper.rb +39 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +10 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +7 -0
- data/app/countries/atlas_engine/sa/country_profile.yml +10 -0
- data/app/countries/atlas_engine/se/country_profile.yml +5 -0
- data/app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb +38 -0
- data/app/countries/atlas_engine/tt/country_profile.yml +7 -0
- data/app/countries/atlas_engine/us/country_profile.yml +12 -0
- data/app/countries/atlas_engine/us/synonyms.yml +350 -0
- data/app/graphql/atlas_engine/errors/locale_unsupported_error.rb +17 -0
- data/app/graphql/atlas_engine/schema.graphql +1293 -0
- data/app/graphql/atlas_engine/schema.rb +23 -0
- data/app/graphql/atlas_engine/types/address_validation/address_input.rb +51 -0
- data/app/graphql/atlas_engine/types/address_validation/concern_type.rb +20 -0
- data/app/graphql/atlas_engine/types/address_validation/enums/concern_enum.rb +15 -0
- data/app/graphql/atlas_engine/types/address_validation/field_type.rb +15 -0
- data/app/graphql/atlas_engine/types/address_validation/suggestion_type.rb +21 -0
- data/app/graphql/atlas_engine/types/base_argument.rb +9 -0
- data/app/graphql/atlas_engine/types/base_enum.rb +9 -0
- data/app/graphql/atlas_engine/types/base_field.rb +10 -0
- data/app/graphql/atlas_engine/types/base_input_object.rb +9 -0
- data/app/graphql/atlas_engine/types/base_interface.rb +10 -0
- data/app/graphql/atlas_engine/types/base_object.rb +9 -0
- data/app/graphql/atlas_engine/types/base_scalar.rb +9 -0
- data/app/graphql/atlas_engine/types/base_union.rb +9 -0
- data/app/graphql/atlas_engine/types/matching_strategy_type.rb +12 -0
- data/app/graphql/atlas_engine/types/mutation_type.rb +9 -0
- data/app/graphql/atlas_engine/types/query_type.rb +61 -0
- data/app/graphql/atlas_engine/types/validation_supported_country.rb +12 -0
- data/app/graphql/atlas_engine/types/validation_type.rb +22 -0
- data/app/helpers/atlas_engine/address_importer/import_log_helper.rb +66 -0
- data/app/helpers/atlas_engine/application_helper.rb +7 -0
- data/app/helpers/atlas_engine/locale_format_helper.rb +40 -0
- data/app/helpers/atlas_engine/log_base.rb +32 -0
- data/app/helpers/atlas_engine/log_helper.rb +24 -0
- data/app/helpers/atlas_engine/metrics_helper.rb +25 -0
- data/app/jobs/atlas_engine/address_importer/clear_records_job.rb +39 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +212 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_launcher_job.rb +67 -0
- data/app/jobs/atlas_engine/address_importer/open_address/prepares_geo_json_file.rb +41 -0
- data/app/jobs/atlas_engine/address_importer/resumable_import_job.rb +49 -0
- data/app/jobs/atlas_engine/address_importer/street_backfill_job.rb +63 -0
- data/app/jobs/atlas_engine/application_job.rb +10 -0
- data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +43 -0
- data/app/lib/atlas_engine/concern_formatter.rb +40 -0
- data/app/lib/atlas_engine/restrictions/base.rb +20 -0
- data/app/lib/atlas_engine/restrictions/unsupported_script.rb +31 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +201 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_factory.rb +27 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_north_america.rb +39 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_oceanic.rb +17 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parser_preprocessor.rb +132 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parsing_helper.rb +38 -0
- data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +54 -0
- data/app/lib/atlas_engine/validation_transcriber/constants.rb +50 -0
- data/app/lib/atlas_engine/validation_transcriber/english_street_parser.rb +59 -0
- data/app/lib/atlas_engine/validation_transcriber/formatter.rb +46 -0
- data/app/lib/atlas_engine/validation_transcriber/french_street_parser.rb +50 -0
- data/app/lib/atlas_engine/validation_transcriber/province_code_normalizer.rb +45 -0
- data/app/lib/atlas_engine/validation_transcriber/street_parser.rb +18 -0
- data/app/lib/atlas_engine/validation_transcriber/zip_normalizer.rb +23 -0
- data/app/mailers/atlas_engine/application_mailer.rb +9 -0
- data/app/models/atlas_engine/address_importer/corrections/corrector.rb +33 -0
- data/app/models/atlas_engine/address_importer/import_events_notifier/base.rb +35 -0
- data/app/models/atlas_engine/address_importer/import_events_notifier/notifier.rb +26 -0
- data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +46 -0
- data/app/models/atlas_engine/address_importer/open_address/feature_helper.rb +110 -0
- data/app/models/atlas_engine/address_importer/open_address/filter.rb +17 -0
- data/app/models/atlas_engine/address_importer/open_address/loader.rb +27 -0
- data/app/models/atlas_engine/address_importer/open_address/transformer.rb +39 -0
- data/app/models/atlas_engine/address_importer/open_address.rb +10 -0
- data/app/models/atlas_engine/address_importer/validation/base_validator.rb +86 -0
- data/app/models/atlas_engine/address_importer/validation/default_validator.rb +27 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/city.rb +47 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/interface.rb +29 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/province.rb +73 -0
- data/app/models/atlas_engine/address_importer/validation/field_validations/zip.rb +84 -0
- data/app/models/atlas_engine/address_importer/validation/validator.rb +17 -0
- data/app/models/atlas_engine/address_importer/validation/wrapper.rb +70 -0
- data/app/models/atlas_engine/address_number.rb +36 -0
- data/app/models/atlas_engine/address_number_range.rb +200 -0
- data/app/models/atlas_engine/address_validation/abstract_address.rb +49 -0
- data/app/models/atlas_engine/address_validation/address.rb +47 -0
- data/app/models/atlas_engine/address_validation/candidate.rb +109 -0
- data/app/models/atlas_engine/address_validation/candidate_tuple.rb +15 -0
- data/app/models/atlas_engine/address_validation/concern.rb +74 -0
- data/app/models/atlas_engine/address_validation/concern_producer.rb +19 -0
- data/app/models/atlas_engine/address_validation/concern_queue.rb +20 -0
- data/app/models/atlas_engine/address_validation/concern_record.rb +122 -0
- data/app/models/atlas_engine/address_validation/datastore_base.rb +27 -0
- data/app/models/atlas_engine/address_validation/errors.rb +13 -0
- data/app/models/atlas_engine/address_validation/es/candidate_selector.rb +70 -0
- data/app/models/atlas_engine/address_validation/es/data_mappers/decompounding_data_mapper.rb +39 -0
- data/app/models/atlas_engine/address_validation/es/data_mappers/default_data_mapper.rb +110 -0
- data/app/models/atlas_engine/address_validation/es/datastore.rb +229 -0
- data/app/models/atlas_engine/address_validation/es/default_query_builder.rb +30 -0
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +160 -0
- data/app/models/atlas_engine/address_validation/es/term_vectors.rb +78 -0
- data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +123 -0
- data/app/models/atlas_engine/address_validation/es/validators/full_address_street.rb +18 -0
- data/app/models/atlas_engine/address_validation/es/validators/restriction_evaluator.rb +37 -0
- data/app/models/atlas_engine/address_validation/field.rb +30 -0
- data/app/models/atlas_engine/address_validation/full_address_validator_base.rb +27 -0
- data/app/models/atlas_engine/address_validation/log_emitter.rb +66 -0
- data/app/models/atlas_engine/address_validation/matching_strategies.rb +16 -0
- data/app/models/atlas_engine/address_validation/normalizer.rb +38 -0
- data/app/models/atlas_engine/address_validation/predicate_pipeline.rb +80 -0
- data/app/models/atlas_engine/address_validation/request.rb +12 -0
- data/app/models/atlas_engine/address_validation/result.rb +154 -0
- data/app/models/atlas_engine/address_validation/runs_validation.rb +16 -0
- data/app/models/atlas_engine/address_validation/session.rb +47 -0
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +72 -0
- data/app/models/atlas_engine/address_validation/strategies.rb +10 -0
- data/app/models/atlas_engine/address_validation/suggestion.rb +97 -0
- data/app/models/atlas_engine/address_validation/token/comparator.rb +44 -0
- data/app/models/atlas_engine/address_validation/token/comparison.rb +76 -0
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +158 -0
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +166 -0
- data/app/models/atlas_engine/address_validation/token/sequence.rb +147 -0
- data/app/models/atlas_engine/address_validation/token/synonyms.rb +77 -0
- data/app/models/atlas_engine/address_validation/token.rb +113 -0
- data/app/models/atlas_engine/address_validation/validator.rb +147 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +97 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +164 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result_base.rb +46 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +135 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/components_to_validate.rb +88 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +127 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +23 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_concern_builder.rb +42 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_country_concern.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_province_concern.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/no_candidate_result.rb +26 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/number_comparison.rb +31 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/postal_code_matcher.rb +60 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/result_updater.rb +42 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +140 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_address_concern.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_province_concern.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unknown_zip_for_address_concern.rb +32 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern.rb +84 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unsupported_script_result.rb +22 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/cache.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +36 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/country/exists.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/country/valid_for_zip.rb +60 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +39 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +38 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_token_count.rb +63 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +41 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/predicate.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +43 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +48 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +45 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +43 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +35 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/present.rb +58 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_country.rb +45 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_province.rb +55 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +25 -0
- data/app/models/atlas_engine/address_validation/zip_truncator.rb +32 -0
- data/app/models/atlas_engine/application_record.rb +8 -0
- data/app/models/atlas_engine/coded_error.rb +18 -0
- data/app/models/atlas_engine/coded_errors.rb +17 -0
- data/app/models/atlas_engine/country_import.rb +44 -0
- data/app/models/atlas_engine/country_profile.rb +270 -0
- data/app/models/atlas_engine/country_profile_ingestion_subset.rb +42 -0
- data/app/models/atlas_engine/country_profile_subset_base.rb +22 -0
- data/app/models/atlas_engine/country_profile_validation_subset.rb +48 -0
- data/app/models/atlas_engine/country_repository.rb +110 -0
- data/app/models/atlas_engine/elasticsearch/client.rb +116 -0
- data/app/models/atlas_engine/elasticsearch/client_interface.rb +89 -0
- data/app/models/atlas_engine/elasticsearch/repository.rb +246 -0
- data/app/models/atlas_engine/elasticsearch/repository_interface.rb +82 -0
- data/app/models/atlas_engine/elasticsearch/response.rb +20 -0
- data/app/models/atlas_engine/event.rb +12 -0
- data/app/models/atlas_engine/field_decompounder.rb +36 -0
- data/app/models/atlas_engine/index_configuration_factory.rb +188 -0
- data/app/models/atlas_engine/post_address.rb +114 -0
- data/app/models/atlas_engine/post_address_importer.rb +34 -0
- data/app/models/atlas_engine/services/service_helper.rb +21 -0
- data/app/models/atlas_engine/services/validation.rb +65 -0
- data/app/models/atlas_engine/services/validation_eligibility.rb +18 -0
- data/app/models/atlas_engine/street.rb +34 -0
- data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +106 -0
- data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +29 -0
- data/app/views/atlas_engine/connectivity/index.html.erb +50 -0
- data/app/views/atlas_engine/country_imports/index.html.erb +49 -0
- data/app/views/atlas_engine/country_imports/show.html.erb +73 -0
- data/app/views/layouts/atlas_engine/application.html.erb +15 -0
- data/config/initializers/1.ruby_patches.rb +18 -0
- data/config/initializers/sorbet.rb +5 -0
- data/config/initializers/worldwide.rb +5 -0
- data/config/locales/internal/en.yml +14 -0
- data/config/routes.rb +17 -0
- data/db/data/address_synonyms/index_configurations/default.yml +141 -0
- data/db/data/country_profiles/default.yml +23 -0
- data/db/data/transcriber.yml +760 -0
- data/db/data/validation_pipelines/es.yml +58 -0
- data/db/data/validation_pipelines/es_street.yml +58 -0
- data/db/data/validation_pipelines/local.yml +60 -0
- data/db/migrate/20230919173037_create_atlas_engine_post_addresses.rb +25 -0
- data/db/migrate/20231117142735_add_building_and_unit_ranges_column.rb +7 -0
- data/db/migrate/20231117143536_create_atlas_engine_country_imports.rb +11 -0
- data/db/migrate/20231117145844_create_atlas_engine_events_table.rb +13 -0
- data/db/migrate/20231123153554_add_unique_index_to_atlas_engine_post_addresses.rb +14 -0
- data/db/migrate/20231123154658_add_index_to_post_addresses_on_source_id_locale_country_code.rb +12 -0
- data/lib/atlas_engine/engine.rb +10 -0
- data/lib/atlas_engine/version.rb +6 -0
- data/lib/atlas_engine.rb +66 -0
- data/lib/tasks/atlas_engine/address_importer.rake +20 -0
- metadata +553 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lt
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+?)"
|
11
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
|
12
|
+
UNIT_NUM = "(?<unit_num>[0-9]+[[:alpha:]]*)"
|
13
|
+
|
14
|
+
sig { override.returns(T::Array[Regexp]) }
|
15
|
+
def country_regex_formats
|
16
|
+
@country_regex_formats ||= [
|
17
|
+
/^#{STREET}\s#{BUILDING_NUM}/i,
|
18
|
+
/^#{STREET}\s#{BUILDING_NUM}-#{UNIT_NUM}/i,
|
19
|
+
]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lu
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class LocaleCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
FRENCH_STREET_PREFIXES = [
|
14
|
+
"Allée",
|
15
|
+
"Avenue",
|
16
|
+
"Boulevard",
|
17
|
+
"Centre",
|
18
|
+
"Ceinture",
|
19
|
+
"Chemin",
|
20
|
+
"Cité",
|
21
|
+
"Domaine",
|
22
|
+
"Impasse",
|
23
|
+
"Maison",
|
24
|
+
"Montée",
|
25
|
+
"Parc",
|
26
|
+
"Passage",
|
27
|
+
"Place",
|
28
|
+
"Plateau",
|
29
|
+
"Porte",
|
30
|
+
"Rond-Point",
|
31
|
+
"Rond Point",
|
32
|
+
"Route",
|
33
|
+
"Rue",
|
34
|
+
"Sentier",
|
35
|
+
"Zone",
|
36
|
+
]
|
37
|
+
|
38
|
+
sig { params(address: Hash).void }
|
39
|
+
def apply(address)
|
40
|
+
street = address[:street]
|
41
|
+
|
42
|
+
address[:locale] = if FRENCH_STREET_PREFIXES.any? { |prefix| street.start_with?(prefix) }
|
43
|
+
"fr"
|
44
|
+
else
|
45
|
+
"lb"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Nl
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:city] == ["'s-Gravenhage"]
|
16
|
+
address[:city] << "Den Haag"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
id: NL
|
2
|
+
ingestion:
|
3
|
+
correctors:
|
4
|
+
open_address:
|
5
|
+
- AtlasEngine::Nl::AddressImporter::Corrections::OpenAddress::CityCorrector
|
6
|
+
data_mapper: AtlasEngine::AddressValidation::Es::DataMappers::DecompoundingDataMapper
|
7
|
+
validation:
|
8
|
+
enabled: true
|
9
|
+
has_provinces: true
|
10
|
+
default_matching_strategy: es
|
11
|
+
normalized_components:
|
12
|
+
- street_decompounded
|
13
|
+
address_parser: AtlasEngine::Nl::ValidationTranscriber::AddressParser
|
14
|
+
decompounding_patterns:
|
15
|
+
street:
|
16
|
+
- (?<name>\w+)(?<suffix>dwarsstraat|dwstr|dwarsweg|dwwg|dijk|dk|gracht|gr|kade|kd|kanaal|kan)(?:\b)
|
17
|
+
- (?<name>\w+)(?<suffix>laan|leane|loane|ln|park|pk|plantsoen|plnts|plein|pln|singel|sngl)(?:\b)
|
18
|
+
- (?<name>\w+)(?<suffix>straat|strjitte|str|straatweg|strwg|weg|wg)(?:\b)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
mappings:
|
2
|
+
properties:
|
3
|
+
street:
|
4
|
+
analyzer: street_indexing_analyzer
|
5
|
+
search_analyzer: street_decompounding_analyzer
|
6
|
+
street_stripped:
|
7
|
+
analyzer: street_indexing_analyzer
|
8
|
+
search_analyzer: street_decompounding_analyzer
|
9
|
+
street_decompounded:
|
10
|
+
type: text
|
11
|
+
analyzer: text_analyzer
|
12
|
+
fields:
|
13
|
+
keyword:
|
14
|
+
type: keyword
|
15
|
+
settings:
|
16
|
+
index:
|
17
|
+
analysis:
|
18
|
+
analyzer:
|
19
|
+
text_analyzer:
|
20
|
+
filter:
|
21
|
+
- lowercase
|
22
|
+
- icu_folding
|
23
|
+
- strip_special_characters
|
24
|
+
street_indexing_analyzer:
|
25
|
+
tokenizer: standard
|
26
|
+
filter:
|
27
|
+
- lowercase
|
28
|
+
- icu_folding
|
29
|
+
- strip_special_characters
|
30
|
+
- street_suffix_decompounder
|
31
|
+
street_decompounding_analyzer:
|
32
|
+
tokenizer: standard
|
33
|
+
filter:
|
34
|
+
- lowercase
|
35
|
+
- icu_folding
|
36
|
+
- strip_special_characters
|
37
|
+
- street_suffix_decompounder
|
38
|
+
- street_synonyms
|
39
|
+
street_analyzer:
|
40
|
+
filter:
|
41
|
+
- lowercase
|
42
|
+
- icu_folding
|
43
|
+
- strip_special_characters
|
44
|
+
- street_synonyms
|
45
|
+
filter:
|
46
|
+
street_suffix_decompounder:
|
47
|
+
type: pattern_capture
|
48
|
+
preserve_original: "false"
|
49
|
+
patterns:
|
50
|
+
<% country_profile.decompounding_patterns(:street).each do |pattern| %>
|
51
|
+
- <%= pattern %>
|
52
|
+
<% end %>
|
@@ -0,0 +1,92 @@
|
|
1
|
+
street_synonyms:
|
2
|
+
# dutch
|
3
|
+
## directionals
|
4
|
+
- noord, n
|
5
|
+
- oost, o
|
6
|
+
- west, w
|
7
|
+
- zuid, z
|
8
|
+
## street suffixes
|
9
|
+
- boulevard, bd, bld, boulev
|
10
|
+
- dijk, dk
|
11
|
+
- dwarsstraat, dwstr
|
12
|
+
- dwarsweg, dwwg
|
13
|
+
- gracht, gr
|
14
|
+
- kade, kd
|
15
|
+
- kanaal, kan
|
16
|
+
- laan, leane, loane, ln
|
17
|
+
- park, pk
|
18
|
+
- plantsoen, plnts
|
19
|
+
- plein, pln, ln
|
20
|
+
- singel, sngl
|
21
|
+
- square, sq
|
22
|
+
- steenweg, stwg
|
23
|
+
- straat, str
|
24
|
+
- straatweg, strwg
|
25
|
+
- van, v
|
26
|
+
- weg, wg
|
27
|
+
## titles
|
28
|
+
- aalmoezenier, aalm # Chaplain
|
29
|
+
- admiraal, adm # Admiral
|
30
|
+
- baron, bar # Baron
|
31
|
+
- baronesse, bsse # Baroness
|
32
|
+
- bisschop, biss # Bishop
|
33
|
+
- burgemeester, burg # Mayor
|
34
|
+
- commissaris, comm # Commissioner
|
35
|
+
- deken, dkn # Deacon
|
36
|
+
- dokter, doctor, dr # Doctor
|
37
|
+
- dominee, ds # Minister, Vicar, Padre
|
38
|
+
- douairière, dre # Dowager
|
39
|
+
- gebroeders, gebr # Brothers
|
40
|
+
- generaal, gen # General
|
41
|
+
- graaf, gr # Duke
|
42
|
+
- gravin, gr # Duchess
|
43
|
+
- hertog, htg # Earl
|
44
|
+
- ingenieur, ir # Engineer
|
45
|
+
- jonkheer, jhr # Esquire
|
46
|
+
- kanunnik, kan # Canon
|
47
|
+
- kapelaan, kap # Curate, Chaplain
|
48
|
+
- kapitein, kapt # Captain
|
49
|
+
- kardinaal, kard # Cardinal
|
50
|
+
- keizer, kzr # Emperor
|
51
|
+
- kolonel, kol # Colonel
|
52
|
+
- koning, kon # King
|
53
|
+
- koningin, kon # Queen
|
54
|
+
- luitenant generaal, lt gen # Lieutenant-General
|
55
|
+
- majoor, maj # Major
|
56
|
+
- meester, mr # Mister (academic grade)
|
57
|
+
- minister, min # Minister (political)
|
58
|
+
- monseigneur, mgr # Monseigneur
|
59
|
+
- notaris, not # Notary
|
60
|
+
- pastoor, past # Pastor
|
61
|
+
- pater, ptr # Father (religious)
|
62
|
+
- paters, ptrs # Fathers (religious)
|
63
|
+
- president, pres # President
|
64
|
+
- prins, pr # Prince
|
65
|
+
- princes, pr # Princess
|
66
|
+
- professor, prof # Professor
|
67
|
+
- rector, rect # Rector
|
68
|
+
- ridder, rdr # Knight
|
69
|
+
- secretaris, secr # Secretary
|
70
|
+
- sinjeur, sinj # Cleric
|
71
|
+
- sint, st # Saint
|
72
|
+
- wethouder, weth # Alderman
|
73
|
+
- zuster, zr # Sister (religious)
|
74
|
+
- zusters, zrs # Sisters (religious)
|
75
|
+
## common words
|
76
|
+
- aan de, a d, ad
|
77
|
+
- het, 't
|
78
|
+
- van, v
|
79
|
+
# friesian
|
80
|
+
## directionals
|
81
|
+
- east, o
|
82
|
+
- noard, n
|
83
|
+
- sud, s
|
84
|
+
## street suffixes
|
85
|
+
- strjitte, str
|
86
|
+
|
87
|
+
city_synonyms:
|
88
|
+
## common words
|
89
|
+
- aan de, a d, ad
|
90
|
+
- het, 't
|
91
|
+
- sint, st
|
92
|
+
- van, v
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Nl
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
|
12
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[:alpha:]*)"
|
13
|
+
UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
|
14
|
+
PO_BOX = /\b(?<box_type>pb|postbus|antwoordnummer)\s+(?<number>\d+)\b/i
|
15
|
+
# since not all street synonyms are street suffixes, we cannot read them from the synonyms file
|
16
|
+
# TODO synonyms in the file should be grouped by type
|
17
|
+
STREET_SUFFIXES = %r{
|
18
|
+
\A(
|
19
|
+
dwarsstraat|dwstr|dwarsweg|dwwg|dijk|dk|gracht|gr|kade|kd|kanaal|kan
|
20
|
+
|laan|leane|loane|ln|park|pk|plantsoen|plnts|plein|pln|singel|sngl
|
21
|
+
|straat|strjitte|str|straatweg|strwg|weg|wg
|
22
|
+
)\z
|
23
|
+
}ix
|
24
|
+
|
25
|
+
sig { returns(T::Array[Regexp]) }
|
26
|
+
def country_regex_formats
|
27
|
+
@country_regex_formats ||= [
|
28
|
+
/^#{STREET},?\s+#{BUILDING_NUM}$/,
|
29
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
|
30
|
+
/^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
31
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
32
|
+
]
|
33
|
+
end
|
34
|
+
|
35
|
+
sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
|
36
|
+
def extract_po_box(address_line)
|
37
|
+
po_box_match = address_line.match(PO_BOX)
|
38
|
+
|
39
|
+
if po_box_match
|
40
|
+
po_box = po_box_match["number"]
|
41
|
+
address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
|
42
|
+
else
|
43
|
+
po_box = nil
|
44
|
+
end
|
45
|
+
|
46
|
+
[address_line, po_box]
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return true if something's obviously wrong with this regex match
|
50
|
+
sig do
|
51
|
+
override.params(
|
52
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
53
|
+
address: AddressValidation::TAddress,
|
54
|
+
).returns(T::Boolean)
|
55
|
+
end
|
56
|
+
def ridiculous?(captures, address)
|
57
|
+
building_num = captures[:building_num]&.downcase
|
58
|
+
street = captures[:street]&.downcase
|
59
|
+
unit_num = captures[:unit_num]&.downcase
|
60
|
+
|
61
|
+
if street.present?
|
62
|
+
return true unless address.address1&.upcase&.include?(street.upcase) ||
|
63
|
+
address.address2&.upcase&.include?(street.upcase)
|
64
|
+
end
|
65
|
+
|
66
|
+
[building_num, unit_num].any? do |token|
|
67
|
+
po_box?(token) || street_suffix?(token)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
72
|
+
def po_box?(token)
|
73
|
+
return false if token.blank?
|
74
|
+
|
75
|
+
token.match?(PO_BOX)
|
76
|
+
end
|
77
|
+
|
78
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
79
|
+
def street_suffix?(token)
|
80
|
+
token.present? && token.match?(STREET_SUFFIXES)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
sig { returns(T::Array[Regexp]) }
|
11
|
+
def country_regex_formats
|
12
|
+
@country_regex_formats ||= [
|
13
|
+
%r{^(?<street>.+)\s+(?<building_num>[0-9][[:alpha:]0-9]*)(\s*/\s*(?<unit_num>[[:alpha:]0-9]+))?$},
|
14
|
+
]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
city_aliases = [
|
16
|
+
{ city_name: "Vila Nova De Gaia", alias: "Gaia" },
|
17
|
+
{ city_name: "Portela Lrs", alias: "Portela" },
|
18
|
+
{ city_name: "Vila Chã Vcd", alias: "Vila Chã" },
|
19
|
+
{ city_name: "Alverca Do Ribatejo", alias: "Alverca" },
|
20
|
+
]
|
21
|
+
|
22
|
+
city_aliases.each do |city_alias|
|
23
|
+
address[:city] << city_alias[:alias] if address[:city].include?(city_alias[:city_name])
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
sig do
|
10
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
|
+
end
|
12
|
+
def map(feature)
|
13
|
+
city, street, number, unit, postcode = feature["properties"].values_at(
|
14
|
+
"city",
|
15
|
+
"street",
|
16
|
+
"number",
|
17
|
+
"unit",
|
18
|
+
"postcode",
|
19
|
+
)
|
20
|
+
{
|
21
|
+
source_id: openaddress_source_id(feature),
|
22
|
+
locale: @locale,
|
23
|
+
country_code: "PT",
|
24
|
+
province_code: province_code_from_zip(postcode),
|
25
|
+
# Omitted: region1..4
|
26
|
+
city: [city.titleize],
|
27
|
+
suburb: nil,
|
28
|
+
zip: postcode,
|
29
|
+
street: street.titleize,
|
30
|
+
building_and_unit_ranges: housenumber_and_unit(number, unit),
|
31
|
+
latitude: geometry(feature)&.at(1),
|
32
|
+
longitude: geometry(feature)&.at(0),
|
33
|
+
}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
id: PT
|
2
|
+
ingestion:
|
3
|
+
correctors:
|
4
|
+
open_address:
|
5
|
+
- AtlasEngine::Pt::AddressImporter::Corrections::OpenAddress::CityCorrector
|
6
|
+
open_address:
|
7
|
+
feature_mapper: AtlasEngine::Pt::AddressImporter::OpenAddress::Mapper
|
8
|
+
validation:
|
9
|
+
enabled: true
|
10
|
+
default_matching_strategy: es
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Tt
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
sig do
|
10
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
|
+
end
|
12
|
+
def map(feature)
|
13
|
+
city, street, number, unit, postcode = feature["properties"].values_at(
|
14
|
+
"city",
|
15
|
+
"street",
|
16
|
+
"number",
|
17
|
+
"unit",
|
18
|
+
"postcode",
|
19
|
+
)
|
20
|
+
{
|
21
|
+
source_id: openaddress_source_id(feature),
|
22
|
+
locale: @locale,
|
23
|
+
country_code: "TT",
|
24
|
+
province_code: nil,
|
25
|
+
city: ["Chaguanas"],
|
26
|
+
suburb: city,
|
27
|
+
zip: postcode,
|
28
|
+
street: street,
|
29
|
+
building_and_unit_ranges: housenumber_and_unit(number, unit),
|
30
|
+
latitude: geometry(feature)&.at(1),
|
31
|
+
longitude: geometry(feature)&.at(0),
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
id: US
|
2
|
+
validation:
|
3
|
+
enabled: true
|
4
|
+
default_matching_strategy: es_street
|
5
|
+
city_fields:
|
6
|
+
- city_aliases
|
7
|
+
address_parser: AtlasEngine::ValidationTranscriber::AddressParserNorthAmerica
|
8
|
+
ingestion:
|
9
|
+
settings:
|
10
|
+
number_of_shards: "7"
|
11
|
+
min_zip_edge_ngram: "1"
|
12
|
+
max_zip_edge_ngram: "10"
|