atlas_engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +123 -0
  3. data/Rakefile +20 -0
  4. data/app/assets/config/atlas_engine_manifest.js +3 -0
  5. data/app/assets/stylesheets/atlas_engine/application.css +15 -0
  6. data/app/concerns/atlas_engine/handles_blob.rb +26 -0
  7. data/app/concerns/atlas_engine/handles_interruption.rb +22 -0
  8. data/app/controllers/atlas_engine/application_controller.rb +7 -0
  9. data/app/controllers/atlas_engine/connectivity_controller.rb +21 -0
  10. data/app/controllers/atlas_engine/country_imports_controller.rb +73 -0
  11. data/app/controllers/atlas_engine/graphql_controller.rb +59 -0
  12. data/app/countries/atlas_engine/ar/country_profile.yml +9 -0
  13. data/app/countries/atlas_engine/at/address_importer/corrections/open_address/city_corrector.rb +23 -0
  14. data/app/countries/atlas_engine/at/country_profile.yml +24 -0
  15. data/app/countries/atlas_engine/at/index_configuration.yml +63 -0
  16. data/app/countries/atlas_engine/at/synonyms.yml +6 -0
  17. data/app/countries/atlas_engine/at/validation_transcriber/address_parser.rb +58 -0
  18. data/app/countries/atlas_engine/au/address_importer/open_address/filter.rb +26 -0
  19. data/app/countries/atlas_engine/au/address_importer/open_address/mapper.rb +41 -0
  20. data/app/countries/atlas_engine/au/country_profile.yml +13 -0
  21. data/app/countries/atlas_engine/au/synonyms.yml +209 -0
  22. data/app/countries/atlas_engine/au/validation_transcriber/address_parser.rb +121 -0
  23. data/app/countries/atlas_engine/be/country_profile.yml +12 -0
  24. data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +38 -0
  25. data/app/countries/atlas_engine/bm/address_importer/open_address/mapper.rb +40 -0
  26. data/app/countries/atlas_engine/bm/country_profile.yml +12 -0
  27. data/app/countries/atlas_engine/br/country_profile.yml +4 -0
  28. data/app/countries/atlas_engine/ca/country_profile.yml +7 -0
  29. data/app/countries/atlas_engine/ca/synonyms.yml +1615 -0
  30. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/city_corrector.rb +29 -0
  31. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/locale_corrector.rb +74 -0
  32. data/app/countries/atlas_engine/ch/address_importer/open_address/mapper.rb +40 -0
  33. data/app/countries/atlas_engine/ch/country_profile.yml +15 -0
  34. data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +15 -0
  35. data/app/countries/atlas_engine/ch/locales/de/index_configuration.yml +63 -0
  36. data/app/countries/atlas_engine/ch/locales/de/synonyms.yml +7 -0
  37. data/app/countries/atlas_engine/ch/locales/fr/synonyms.yml +21 -0
  38. data/app/countries/atlas_engine/cz/country_profile.yml +6 -0
  39. data/app/countries/atlas_engine/de/country_profile.yml +19 -0
  40. data/app/countries/atlas_engine/de/index_configuration.yml +64 -0
  41. data/app/countries/atlas_engine/de/synonyms.yml +2 -0
  42. data/app/countries/atlas_engine/de/validation_transcriber/address_parser.rb +19 -0
  43. data/app/countries/atlas_engine/dk/country_profile.yml +6 -0
  44. data/app/countries/atlas_engine/dk/synonyms.yml +3 -0
  45. data/app/countries/atlas_engine/dk/validation_transcriber/address_parser.rb +21 -0
  46. data/app/countries/atlas_engine/fo/country_profile.yml +5 -0
  47. data/app/countries/atlas_engine/fr/address_importer/corrections/open_address/city_corrector.rb +28 -0
  48. data/app/countries/atlas_engine/fr/country_profile.yml +13 -0
  49. data/app/countries/atlas_engine/fr/synonyms.yml +21 -0
  50. data/app/countries/atlas_engine/fr/validation_transcriber/address_parser.rb +34 -0
  51. data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +98 -0
  52. data/app/countries/atlas_engine/gb/country_profile.yml +10 -0
  53. data/app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb +164 -0
  54. data/app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb +120 -0
  55. data/app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb +39 -0
  56. data/app/countries/atlas_engine/gg/country_profile.yml +7 -0
  57. data/app/countries/atlas_engine/ie/country_profile.yml +3 -0
  58. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb +27 -0
  59. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/province_corrector.rb +29 -0
  60. data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +42 -0
  61. data/app/countries/atlas_engine/it/country_profile.yml +11 -0
  62. data/app/countries/atlas_engine/jp/address_validation/es/data_mapper.rb +63 -0
  63. data/app/countries/atlas_engine/jp/country_profile.yml +6 -0
  64. data/app/countries/atlas_engine/kr/address_importer/open_address/mapper.rb +41 -0
  65. data/app/countries/atlas_engine/kr/country_profile.yml +11 -0
  66. data/app/countries/atlas_engine/li/address_importer/corrections/open_address/city_corrector.rb +25 -0
  67. data/app/countries/atlas_engine/li/country_profile.yml +21 -0
  68. data/app/countries/atlas_engine/li/index_configuration.yml +63 -0
  69. data/app/countries/atlas_engine/li/synonyms.yml +6 -0
  70. data/app/countries/atlas_engine/lt/country_profile.yml +6 -0
  71. data/app/countries/atlas_engine/lt/synonyms.yml +7 -0
  72. data/app/countries/atlas_engine/lt/validation_transcriber/address_parser.rb +24 -0
  73. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb +54 -0
  74. data/app/countries/atlas_engine/lu/country_profile.yml +12 -0
  75. data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  76. data/app/countries/atlas_engine/nl/country_profile.yml +18 -0
  77. data/app/countries/atlas_engine/nl/index_configuration.yml +52 -0
  78. data/app/countries/atlas_engine/nl/synonyms.yml +92 -0
  79. data/app/countries/atlas_engine/nl/validation_transcriber/address_parser.rb +85 -0
  80. data/app/countries/atlas_engine/no/country_profile.yml +5 -0
  81. data/app/countries/atlas_engine/nz/country_profile.yml +3 -0
  82. data/app/countries/atlas_engine/pl/country_profile.yml +5 -0
  83. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +19 -0
  84. data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb +32 -0
  85. data/app/countries/atlas_engine/pt/address_importer/open_address/mapper.rb +39 -0
  86. data/app/countries/atlas_engine/pt/country_profile.yml +10 -0
  87. data/app/countries/atlas_engine/pt/synonyms.yml +7 -0
  88. data/app/countries/atlas_engine/sa/country_profile.yml +10 -0
  89. data/app/countries/atlas_engine/se/country_profile.yml +5 -0
  90. data/app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb +38 -0
  91. data/app/countries/atlas_engine/tt/country_profile.yml +7 -0
  92. data/app/countries/atlas_engine/us/country_profile.yml +12 -0
  93. data/app/countries/atlas_engine/us/synonyms.yml +350 -0
  94. data/app/graphql/atlas_engine/errors/locale_unsupported_error.rb +17 -0
  95. data/app/graphql/atlas_engine/schema.graphql +1293 -0
  96. data/app/graphql/atlas_engine/schema.rb +23 -0
  97. data/app/graphql/atlas_engine/types/address_validation/address_input.rb +51 -0
  98. data/app/graphql/atlas_engine/types/address_validation/concern_type.rb +20 -0
  99. data/app/graphql/atlas_engine/types/address_validation/enums/concern_enum.rb +15 -0
  100. data/app/graphql/atlas_engine/types/address_validation/field_type.rb +15 -0
  101. data/app/graphql/atlas_engine/types/address_validation/suggestion_type.rb +21 -0
  102. data/app/graphql/atlas_engine/types/base_argument.rb +9 -0
  103. data/app/graphql/atlas_engine/types/base_enum.rb +9 -0
  104. data/app/graphql/atlas_engine/types/base_field.rb +10 -0
  105. data/app/graphql/atlas_engine/types/base_input_object.rb +9 -0
  106. data/app/graphql/atlas_engine/types/base_interface.rb +10 -0
  107. data/app/graphql/atlas_engine/types/base_object.rb +9 -0
  108. data/app/graphql/atlas_engine/types/base_scalar.rb +9 -0
  109. data/app/graphql/atlas_engine/types/base_union.rb +9 -0
  110. data/app/graphql/atlas_engine/types/matching_strategy_type.rb +12 -0
  111. data/app/graphql/atlas_engine/types/mutation_type.rb +9 -0
  112. data/app/graphql/atlas_engine/types/query_type.rb +61 -0
  113. data/app/graphql/atlas_engine/types/validation_supported_country.rb +12 -0
  114. data/app/graphql/atlas_engine/types/validation_type.rb +22 -0
  115. data/app/helpers/atlas_engine/address_importer/import_log_helper.rb +66 -0
  116. data/app/helpers/atlas_engine/application_helper.rb +7 -0
  117. data/app/helpers/atlas_engine/locale_format_helper.rb +40 -0
  118. data/app/helpers/atlas_engine/log_base.rb +32 -0
  119. data/app/helpers/atlas_engine/log_helper.rb +24 -0
  120. data/app/helpers/atlas_engine/metrics_helper.rb +25 -0
  121. data/app/jobs/atlas_engine/address_importer/clear_records_job.rb +39 -0
  122. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +212 -0
  123. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_launcher_job.rb +67 -0
  124. data/app/jobs/atlas_engine/address_importer/open_address/prepares_geo_json_file.rb +41 -0
  125. data/app/jobs/atlas_engine/address_importer/resumable_import_job.rb +49 -0
  126. data/app/jobs/atlas_engine/address_importer/street_backfill_job.rb +63 -0
  127. data/app/jobs/atlas_engine/application_job.rb +10 -0
  128. data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +43 -0
  129. data/app/lib/atlas_engine/concern_formatter.rb +40 -0
  130. data/app/lib/atlas_engine/restrictions/base.rb +20 -0
  131. data/app/lib/atlas_engine/restrictions/unsupported_script.rb +31 -0
  132. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +201 -0
  133. data/app/lib/atlas_engine/validation_transcriber/address_parser_factory.rb +27 -0
  134. data/app/lib/atlas_engine/validation_transcriber/address_parser_north_america.rb +39 -0
  135. data/app/lib/atlas_engine/validation_transcriber/address_parser_oceanic.rb +17 -0
  136. data/app/lib/atlas_engine/validation_transcriber/address_parser_preprocessor.rb +132 -0
  137. data/app/lib/atlas_engine/validation_transcriber/address_parsing_helper.rb +38 -0
  138. data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +54 -0
  139. data/app/lib/atlas_engine/validation_transcriber/constants.rb +50 -0
  140. data/app/lib/atlas_engine/validation_transcriber/english_street_parser.rb +59 -0
  141. data/app/lib/atlas_engine/validation_transcriber/formatter.rb +46 -0
  142. data/app/lib/atlas_engine/validation_transcriber/french_street_parser.rb +50 -0
  143. data/app/lib/atlas_engine/validation_transcriber/province_code_normalizer.rb +45 -0
  144. data/app/lib/atlas_engine/validation_transcriber/street_parser.rb +18 -0
  145. data/app/lib/atlas_engine/validation_transcriber/zip_normalizer.rb +23 -0
  146. data/app/mailers/atlas_engine/application_mailer.rb +9 -0
  147. data/app/models/atlas_engine/address_importer/corrections/corrector.rb +33 -0
  148. data/app/models/atlas_engine/address_importer/import_events_notifier/base.rb +35 -0
  149. data/app/models/atlas_engine/address_importer/import_events_notifier/notifier.rb +26 -0
  150. data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +46 -0
  151. data/app/models/atlas_engine/address_importer/open_address/feature_helper.rb +110 -0
  152. data/app/models/atlas_engine/address_importer/open_address/filter.rb +17 -0
  153. data/app/models/atlas_engine/address_importer/open_address/loader.rb +27 -0
  154. data/app/models/atlas_engine/address_importer/open_address/transformer.rb +39 -0
  155. data/app/models/atlas_engine/address_importer/open_address.rb +10 -0
  156. data/app/models/atlas_engine/address_importer/validation/base_validator.rb +86 -0
  157. data/app/models/atlas_engine/address_importer/validation/default_validator.rb +27 -0
  158. data/app/models/atlas_engine/address_importer/validation/field_validations/city.rb +47 -0
  159. data/app/models/atlas_engine/address_importer/validation/field_validations/interface.rb +29 -0
  160. data/app/models/atlas_engine/address_importer/validation/field_validations/province.rb +73 -0
  161. data/app/models/atlas_engine/address_importer/validation/field_validations/zip.rb +84 -0
  162. data/app/models/atlas_engine/address_importer/validation/validator.rb +17 -0
  163. data/app/models/atlas_engine/address_importer/validation/wrapper.rb +70 -0
  164. data/app/models/atlas_engine/address_number.rb +36 -0
  165. data/app/models/atlas_engine/address_number_range.rb +200 -0
  166. data/app/models/atlas_engine/address_validation/abstract_address.rb +49 -0
  167. data/app/models/atlas_engine/address_validation/address.rb +47 -0
  168. data/app/models/atlas_engine/address_validation/candidate.rb +109 -0
  169. data/app/models/atlas_engine/address_validation/candidate_tuple.rb +15 -0
  170. data/app/models/atlas_engine/address_validation/concern.rb +74 -0
  171. data/app/models/atlas_engine/address_validation/concern_producer.rb +19 -0
  172. data/app/models/atlas_engine/address_validation/concern_queue.rb +20 -0
  173. data/app/models/atlas_engine/address_validation/concern_record.rb +122 -0
  174. data/app/models/atlas_engine/address_validation/datastore_base.rb +27 -0
  175. data/app/models/atlas_engine/address_validation/errors.rb +13 -0
  176. data/app/models/atlas_engine/address_validation/es/candidate_selector.rb +70 -0
  177. data/app/models/atlas_engine/address_validation/es/data_mappers/decompounding_data_mapper.rb +39 -0
  178. data/app/models/atlas_engine/address_validation/es/data_mappers/default_data_mapper.rb +110 -0
  179. data/app/models/atlas_engine/address_validation/es/datastore.rb +229 -0
  180. data/app/models/atlas_engine/address_validation/es/default_query_builder.rb +30 -0
  181. data/app/models/atlas_engine/address_validation/es/query_builder.rb +160 -0
  182. data/app/models/atlas_engine/address_validation/es/term_vectors.rb +78 -0
  183. data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +123 -0
  184. data/app/models/atlas_engine/address_validation/es/validators/full_address_street.rb +18 -0
  185. data/app/models/atlas_engine/address_validation/es/validators/restriction_evaluator.rb +37 -0
  186. data/app/models/atlas_engine/address_validation/field.rb +30 -0
  187. data/app/models/atlas_engine/address_validation/full_address_validator_base.rb +27 -0
  188. data/app/models/atlas_engine/address_validation/log_emitter.rb +66 -0
  189. data/app/models/atlas_engine/address_validation/matching_strategies.rb +16 -0
  190. data/app/models/atlas_engine/address_validation/normalizer.rb +38 -0
  191. data/app/models/atlas_engine/address_validation/predicate_pipeline.rb +80 -0
  192. data/app/models/atlas_engine/address_validation/request.rb +12 -0
  193. data/app/models/atlas_engine/address_validation/result.rb +154 -0
  194. data/app/models/atlas_engine/address_validation/runs_validation.rb +16 -0
  195. data/app/models/atlas_engine/address_validation/session.rb +47 -0
  196. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +72 -0
  197. data/app/models/atlas_engine/address_validation/strategies.rb +10 -0
  198. data/app/models/atlas_engine/address_validation/suggestion.rb +97 -0
  199. data/app/models/atlas_engine/address_validation/token/comparator.rb +44 -0
  200. data/app/models/atlas_engine/address_validation/token/comparison.rb +76 -0
  201. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +158 -0
  202. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +166 -0
  203. data/app/models/atlas_engine/address_validation/token/sequence.rb +147 -0
  204. data/app/models/atlas_engine/address_validation/token/synonyms.rb +77 -0
  205. data/app/models/atlas_engine/address_validation/token.rb +113 -0
  206. data/app/models/atlas_engine/address_validation/validator.rb +147 -0
  207. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +97 -0
  208. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +164 -0
  209. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result_base.rb +46 -0
  210. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +135 -0
  211. data/app/models/atlas_engine/address_validation/validators/full_address/components_to_validate.rb +88 -0
  212. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +127 -0
  213. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +23 -0
  214. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_concern_builder.rb +42 -0
  215. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_country_concern.rb +37 -0
  216. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_province_concern.rb +37 -0
  217. data/app/models/atlas_engine/address_validation/validators/full_address/no_candidate_result.rb +26 -0
  218. data/app/models/atlas_engine/address_validation/validators/full_address/number_comparison.rb +31 -0
  219. data/app/models/atlas_engine/address_validation/validators/full_address/postal_code_matcher.rb +60 -0
  220. data/app/models/atlas_engine/address_validation/validators/full_address/result_updater.rb +42 -0
  221. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +140 -0
  222. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_address_concern.rb +30 -0
  223. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_province_concern.rb +38 -0
  224. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_zip_for_address_concern.rb +32 -0
  225. data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern.rb +84 -0
  226. data/app/models/atlas_engine/address_validation/validators/full_address/unsupported_script_result.rb +22 -0
  227. data/app/models/atlas_engine/address_validation/validators/predicates/cache.rb +38 -0
  228. data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +36 -0
  229. data/app/models/atlas_engine/address_validation/validators/predicates/country/exists.rb +34 -0
  230. data/app/models/atlas_engine/address_validation/validators/predicates/country/valid_for_zip.rb +60 -0
  231. data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +38 -0
  232. data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +39 -0
  233. data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +38 -0
  234. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +34 -0
  235. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_token_count.rb +63 -0
  236. data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +41 -0
  237. data/app/models/atlas_engine/address_validation/validators/predicates/predicate.rb +37 -0
  238. data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +43 -0
  239. data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +48 -0
  240. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +45 -0
  241. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +43 -0
  242. data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +35 -0
  243. data/app/models/atlas_engine/address_validation/validators/predicates/zip/present.rb +58 -0
  244. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_country.rb +45 -0
  245. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_province.rb +55 -0
  246. data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +25 -0
  247. data/app/models/atlas_engine/address_validation/zip_truncator.rb +32 -0
  248. data/app/models/atlas_engine/application_record.rb +8 -0
  249. data/app/models/atlas_engine/coded_error.rb +18 -0
  250. data/app/models/atlas_engine/coded_errors.rb +17 -0
  251. data/app/models/atlas_engine/country_import.rb +44 -0
  252. data/app/models/atlas_engine/country_profile.rb +270 -0
  253. data/app/models/atlas_engine/country_profile_ingestion_subset.rb +42 -0
  254. data/app/models/atlas_engine/country_profile_subset_base.rb +22 -0
  255. data/app/models/atlas_engine/country_profile_validation_subset.rb +48 -0
  256. data/app/models/atlas_engine/country_repository.rb +110 -0
  257. data/app/models/atlas_engine/elasticsearch/client.rb +116 -0
  258. data/app/models/atlas_engine/elasticsearch/client_interface.rb +89 -0
  259. data/app/models/atlas_engine/elasticsearch/repository.rb +246 -0
  260. data/app/models/atlas_engine/elasticsearch/repository_interface.rb +82 -0
  261. data/app/models/atlas_engine/elasticsearch/response.rb +20 -0
  262. data/app/models/atlas_engine/event.rb +12 -0
  263. data/app/models/atlas_engine/field_decompounder.rb +36 -0
  264. data/app/models/atlas_engine/index_configuration_factory.rb +188 -0
  265. data/app/models/atlas_engine/post_address.rb +114 -0
  266. data/app/models/atlas_engine/post_address_importer.rb +34 -0
  267. data/app/models/atlas_engine/services/service_helper.rb +21 -0
  268. data/app/models/atlas_engine/services/validation.rb +65 -0
  269. data/app/models/atlas_engine/services/validation_eligibility.rb +18 -0
  270. data/app/models/atlas_engine/street.rb +34 -0
  271. data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +106 -0
  272. data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +29 -0
  273. data/app/views/atlas_engine/connectivity/index.html.erb +50 -0
  274. data/app/views/atlas_engine/country_imports/index.html.erb +49 -0
  275. data/app/views/atlas_engine/country_imports/show.html.erb +73 -0
  276. data/app/views/layouts/atlas_engine/application.html.erb +15 -0
  277. data/config/initializers/1.ruby_patches.rb +18 -0
  278. data/config/initializers/sorbet.rb +5 -0
  279. data/config/initializers/worldwide.rb +5 -0
  280. data/config/locales/internal/en.yml +14 -0
  281. data/config/routes.rb +17 -0
  282. data/db/data/address_synonyms/index_configurations/default.yml +141 -0
  283. data/db/data/country_profiles/default.yml +23 -0
  284. data/db/data/transcriber.yml +760 -0
  285. data/db/data/validation_pipelines/es.yml +58 -0
  286. data/db/data/validation_pipelines/es_street.yml +58 -0
  287. data/db/data/validation_pipelines/local.yml +60 -0
  288. data/db/migrate/20230919173037_create_atlas_engine_post_addresses.rb +25 -0
  289. data/db/migrate/20231117142735_add_building_and_unit_ranges_column.rb +7 -0
  290. data/db/migrate/20231117143536_create_atlas_engine_country_imports.rb +11 -0
  291. data/db/migrate/20231117145844_create_atlas_engine_events_table.rb +13 -0
  292. data/db/migrate/20231123153554_add_unique_index_to_atlas_engine_post_addresses.rb +14 -0
  293. data/db/migrate/20231123154658_add_index_to_post_addresses_on_source_id_locale_country_code.rb +12 -0
  294. data/lib/atlas_engine/engine.rb +10 -0
  295. data/lib/atlas_engine/version.rb +6 -0
  296. data/lib/atlas_engine.rb +66 -0
  297. data/lib/tasks/atlas_engine/address_importer.rake +20 -0
  298. metadata +553 -0
@@ -0,0 +1,229 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ class Datastore
8
+ include MetricsHelper
9
+ include LogHelper
10
+ include DatastoreBase
11
+ extend T::Sig
12
+
13
+ attr_reader :parsings
14
+ attr_writer :candidates # meant for test setup only
15
+
16
+ sig { params(address: AbstractAddress, locale: T.nilable(String)).void }
17
+ def initialize(address:, locale: nil)
18
+ @address = address
19
+ @locale = locale
20
+
21
+ raise ArgumentError, "address has no country_code" if address.country_code.blank?
22
+
23
+ @country_code = T.must(address.country_code.to_s)
24
+ @profile = CountryProfile.for(country_code.to_s.upcase)
25
+
26
+ if locale.nil? && @profile.validation.multi_locale?
27
+ raise ArgumentError, "#{country_code} is a multi-locale country and requires a locale"
28
+ end
29
+
30
+ @parsings = ValidationTranscriber::AddressParsings.new(address_input: address, locale: locale)
31
+ @query_builder = QueryBuilder.for(address, locale)
32
+ end
33
+
34
+ sig do
35
+ returns(CountryRepository)
36
+ end
37
+ def repository
38
+ @repository ||= CountryRepository.new(
39
+ country_code: country_code.downcase,
40
+ repository_class: AtlasEngine.elasticsearch_repository.constantize,
41
+ locale: locale&.downcase,
42
+ index_configuration: nil,
43
+ )
44
+ end
45
+
46
+ sig { params(sequence: Token::Sequence).void }
47
+ def city_sequence=(sequence)
48
+ @city_sequence_future = Concurrent::Promises.fulfilled_future(sequence)
49
+ end
50
+
51
+ sig { override.returns(Token::Sequence) }
52
+ def fetch_city_sequence
53
+ log_future_state_on_join(future: @city_sequence_future, method: "city_sequence")
54
+
55
+ @city_sequence_future ||= Concurrent::Promises.fulfilled_future(fetch_city_sequence_internal)
56
+
57
+ @city_sequence_future.value!
58
+ end
59
+
60
+ sig { returns(Concurrent::Promises::Future) }
61
+ def fetch_city_sequence_async
62
+ submit_time = Time.current
63
+
64
+ @city_sequence_future ||= Concurrent::Promises.future do
65
+ measure_future_queue_time(enqueue_time: submit_time, method: "city_sequence")
66
+ fetch_city_sequence_internal
67
+ end
68
+ end
69
+
70
+ sig { params(sequences: T::Array[Token::Sequence]).void }
71
+ def street_sequences=(sequences)
72
+ @street_sequences_future = Concurrent::Promises.fulfilled_future(sequences)
73
+ end
74
+
75
+ sig { override.returns(T::Array[Token::Sequence]) }
76
+ def fetch_street_sequences
77
+ log_future_state_on_join(future: @street_sequences_future, method: "all_street_sequences")
78
+
79
+ @street_sequences_future ||= Concurrent::Promises.fulfilled_future(fetch_street_sequences_internal)
80
+
81
+ @street_sequences_future.value!
82
+ end
83
+
84
+ sig { returns(Concurrent::Promises::Future) }
85
+ def fetch_street_sequences_async
86
+ submit_time = Time.current
87
+
88
+ @street_sequences_future ||= Concurrent::Promises.future do
89
+ measure_future_queue_time(enqueue_time: submit_time, method: "all_street_sequences")
90
+ fetch_street_sequences_internal
91
+ end
92
+ end
93
+
94
+ sig { override.returns(T::Array[Candidate]) }
95
+ def fetch_full_address_candidates
96
+ @candidates ||= fetch_addresses_internal.map { |address| Candidate.from(address) }.tap do |candidates|
97
+ assign_term_vectors_to_candidates(candidates) if candidates.present?
98
+ end
99
+ end
100
+
101
+ sig { override.returns(Hash) }
102
+ def validation_response
103
+ {
104
+ body: fetch_addresses_internal,
105
+ }
106
+ end
107
+
108
+ private
109
+
110
+ attr_reader :address, :country_code, :locale, :profile, :query_builder
111
+
112
+ sig { returns(Token::Sequence) }
113
+ def fetch_city_sequence_internal
114
+ city_value = address.city
115
+ request = {
116
+ analyzer: :city_analyzer,
117
+ text: city_value,
118
+ }
119
+
120
+ measure_es_validation_request_time(method: "city_sequence") do
121
+ tokens = repository.analyze(request).map do |token|
122
+ Token.from_analyze(token)
123
+ end
124
+ Token::Sequence.new(tokens: tokens, raw_value: city_value)
125
+ end
126
+ end
127
+
128
+ sig { returns(T::Array[T::Hash[String, T.untyped]]) }
129
+ def fetch_addresses_internal
130
+ measure_es_validation_request_time(method: "full_address_candidates") do
131
+ repository.search(query_builder.full_address_query)
132
+ end
133
+ end
134
+
135
+ sig { returns(T::Array[Token::Sequence]) }
136
+ def fetch_street_sequences_internal
137
+ measure_es_validation_request_time(method: "all_street_sequences") do
138
+ @parsings.potential_streets.map do |street_address_value|
139
+ request = {
140
+ analyzer: :street_analyzer,
141
+ text: prepare_street_for_analysis(street_address_value),
142
+ }
143
+
144
+ measure_es_validation_request_time(method: "street_sequence") do
145
+ tokens = repository.analyze(request).map do |token|
146
+ Token.from_analyze(token)
147
+ end
148
+ Token::Sequence.new(tokens: tokens, raw_value: street_address_value)
149
+ end
150
+ end
151
+ end
152
+ end
153
+
154
+ sig { params(candidates: T::Array[Candidate]).void }
155
+ def assign_term_vectors_to_candidates(candidates)
156
+ return if profile.validation.normalized_components.blank?
157
+
158
+ candidate_term_vectors = measure_es_validation_request_time(method: "term_vectors") do
159
+ repository.term_vectors(term_vectors_query(candidates))
160
+ end
161
+
162
+ TermVectors.new(term_vectors_hashes: candidate_term_vectors, candidates: candidates).set_candidate_sequences
163
+ end
164
+
165
+ sig { params(candidates: T::Array[Candidate]).returns(T::Hash[String, T.untyped]) }
166
+ def term_vectors_query(candidates)
167
+ {
168
+ ids: candidates.map(&:id),
169
+ parameters: {
170
+ fields: profile.validation.normalized_components,
171
+ field_statistics: false,
172
+ },
173
+ }
174
+ end
175
+
176
+ sig { params(method: String, block: T.proc.returns(T.untyped)).returns(T.untyped) }
177
+ def measure_es_validation_request_time(method:, &block)
178
+ measure_distribution(
179
+ name: "AddressValidation.elasticsearch_request_time_dist",
180
+ tags: [
181
+ "country:#{country_code}",
182
+ "method:#{method}",
183
+ ],
184
+ &block
185
+ )
186
+ end
187
+
188
+ sig { params(future: T.nilable(Concurrent::Promises::Future), method: String).void }
189
+ def log_future_state_on_join(future:, method:)
190
+ state = future&.state || :unsubmitted
191
+ log_warn("Joining with #{state} future, method: #{method}") unless state == :fulfilled
192
+
193
+ StatsD.increment(
194
+ "AddressValidation.elasticsearch_future_state",
195
+ sample_rate: 1.0,
196
+ tags: {
197
+ country: country_code,
198
+ method:,
199
+ state:,
200
+ },
201
+ )
202
+ end
203
+
204
+ sig { params(enqueue_time: ActiveSupport::TimeWithZone, method: String).void }
205
+ def measure_future_queue_time(enqueue_time:, method:)
206
+ StatsD.distribution(
207
+ "AddressValidation.elasticsearch_future_queue_time",
208
+ Time.current - enqueue_time,
209
+ tags: [
210
+ "country:#{country_code}",
211
+ "method:#{method}",
212
+ ],
213
+ )
214
+ end
215
+
216
+ sig { params(street_value: String).returns(String) }
217
+ def prepare_street_for_analysis(street_value)
218
+ T.must(
219
+ FieldDecompounder.new(
220
+ field: :street,
221
+ value: street_value,
222
+ country_profile: profile,
223
+ ).call,
224
+ )
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,30 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ class DefaultQueryBuilder < QueryBuilder
8
+ sig { override.returns(T::Hash[String, T.untyped]) }
9
+ def full_address_query
10
+ clauses = [
11
+ building_number_clause,
12
+ street_clause,
13
+ city_clause,
14
+ zip_clause,
15
+ province_clause,
16
+ ].compact
17
+ {
18
+ "query" => {
19
+ "bool" =>
20
+ {
21
+ "should" => clauses,
22
+ "minimum_should_match" => [clauses.count - 2, 2].max,
23
+ },
24
+ },
25
+ }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,160 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ class QueryBuilder
8
+ extend T::Helpers
9
+ extend T::Sig
10
+
11
+ abstract!
12
+ class << self
13
+ extend T::Sig
14
+
15
+ sig { params(address: AbstractAddress, locale: T.nilable(String)).returns(QueryBuilder) }
16
+ def for(address, locale = nil)
17
+ profile = CountryProfile.for(T.must(address.country_code), locale)
18
+ profile.attributes.dig("validation", "query_builder").constantize.new(address, locale)
19
+ end
20
+ end
21
+
22
+ sig { params(address: AbstractAddress, locale: T.nilable(String)).void }
23
+ def initialize(address, locale = nil)
24
+ @address = address
25
+ @profile = CountryProfile.for(T.must(address.country_code), locale)
26
+ @parsings = ValidationTranscriber::AddressParsings.new(address_input: address, locale: locale)
27
+ end
28
+
29
+ sig { abstract.returns(T::Hash[String, T.untyped]) }
30
+ def full_address_query; end
31
+
32
+ private
33
+
34
+ sig { returns(AbstractAddress) }
35
+ attr_reader :address
36
+
37
+ sig { returns(CountryProfile) }
38
+ attr_reader :profile
39
+
40
+ sig { returns(Hash) }
41
+ def building_number_clause
42
+ potential_building_numbers = @parsings.potential_building_numbers.filter_map do |n|
43
+ AddressNumber.new(value: n).to_i
44
+ end.uniq
45
+
46
+ building_number_queries = [empty_approx_building_clause]
47
+ building_number_queries.unshift(
48
+ *T.unsafe(potential_building_numbers.map do |value|
49
+ approx_building_clause(value)
50
+ end),
51
+ ) if potential_building_numbers.any?
52
+
53
+ {
54
+ "dis_max" => {
55
+ "queries" => building_number_queries,
56
+ },
57
+ }
58
+ end
59
+
60
+ sig { params(value: Integer).returns(Hash) }
61
+ def approx_building_clause(value)
62
+ {
63
+ "term" => {
64
+ "approx_building_ranges" => {
65
+ "value" => value,
66
+ },
67
+ },
68
+ }
69
+ end
70
+
71
+ sig { returns(Hash) }
72
+ def empty_approx_building_clause
73
+ {
74
+ "bool" => {
75
+ "must_not" => {
76
+ "exists" => {
77
+ "field" => "approx_building_ranges",
78
+ },
79
+ },
80
+ },
81
+ }
82
+ end
83
+
84
+ sig { returns(Hash) }
85
+ def street_clause
86
+ {
87
+ "dis_max" => {
88
+ "queries" => street_query_values.map do |value|
89
+ {
90
+ "match" => {
91
+ "street" => { "query" => value, "fuzziness" => "auto" },
92
+ },
93
+ }
94
+ end.union(
95
+ stripped_street_query_values.map do |value|
96
+ {
97
+ "match" => {
98
+ "street_stripped" => { "query" => value, "fuzziness" => "auto" },
99
+ },
100
+ }
101
+ end,
102
+ ),
103
+ },
104
+ }
105
+ end
106
+
107
+ sig { returns(T::Array[String]) }
108
+ def street_query_values
109
+ street_names.presence || [address.address1.to_s, address.address2.to_s].compact_blank.uniq
110
+ end
111
+
112
+ sig { returns(T::Array[String]) }
113
+ def street_names
114
+ streets = @parsings.potential_streets
115
+ (streets + streets.map { |street| Street.new(street: street).with_stripped_name }).uniq
116
+ end
117
+
118
+ sig { returns(T::Array[String]) }
119
+ def stripped_street_query_values
120
+ @parsings.potential_streets.map { |street| Street.new(street: street).with_stripped_name }.uniq
121
+ end
122
+
123
+ sig { returns(T.nilable(Hash)) }
124
+ def city_clause
125
+ {
126
+ "nested" => {
127
+ "path" => "city_aliases",
128
+ "query" => {
129
+ "match" => {
130
+ "city_aliases.alias" => { "query" => address.city.to_s, "fuzziness" => "auto" },
131
+ },
132
+ },
133
+ },
134
+ }
135
+ end
136
+
137
+ sig { returns(Hash) }
138
+ def zip_clause
139
+ normalized_zip = ValidationTranscriber::ZipNormalizer.normalize(
140
+ country_code: address.country_code, zip: address.zip,
141
+ )
142
+ {
143
+ "match" => {
144
+ "zip" => { "query" => normalized_zip, "fuzziness" => "auto" },
145
+ },
146
+ }
147
+ end
148
+
149
+ sig { returns(T.nilable(Hash)) }
150
+ def province_clause
151
+ {
152
+ "term" => {
153
+ "province_code" => { "value" => address.province_code.to_s.downcase },
154
+ },
155
+ } if profile.attributes.dig("validation", "has_provinces")
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,78 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ class TermVectors
8
+ extend T::Sig
9
+
10
+ sig do
11
+ params(
12
+ term_vectors_hashes: T::Array[T::Hash[String, T::Hash[String, T.untyped]]],
13
+ candidates: T::Array[Candidate],
14
+ ).void
15
+ end
16
+ def initialize(term_vectors_hashes:, candidates:)
17
+ @term_vectors_hashes = term_vectors_hashes
18
+ @candidates = candidates
19
+ end
20
+
21
+ sig { void }
22
+ def set_candidate_sequences
23
+ candidates_by_id = candidates.index_by(&:id)
24
+
25
+ term_vectors_hashes.map do |candidate_result|
26
+ candidate = candidates_by_id[candidate_result["_id"]]
27
+
28
+ next if candidate.nil?
29
+
30
+ candidate_result["term_vectors"].map do |component_name, terms_hash|
31
+ component_name = component_name.delete_suffix("_decompounded")
32
+ # city values are indexed as city_aliases.alias, but Atlas still uses :city as the component name
33
+ component_name = "city" if component_name == "city_aliases.alias"
34
+ component = candidate.component(component_name.to_sym)
35
+ sorted_tokens = Token.from_field_term_vector(terms_hash)
36
+ set_sequences(component, sorted_tokens)
37
+ end
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ attr_reader :term_vectors_hashes, :candidates
44
+
45
+ sig do
46
+ params(component: Candidate::Component, sorted_tokens: T::Array[Token]).void
47
+ end
48
+ def set_sequences(component, sorted_tokens)
49
+ grouped_tokens = split_tokens_by_position(sorted_tokens)
50
+ component.sequences = grouped_tokens.map.with_index do |sequence_tokens, value_index|
51
+ # ES' offsets are set as if all tokens are part of one long sequence
52
+ # we adjust the offsets to be relative to the start of each sequence
53
+ offset = T.must(sequence_tokens.first).start_offset
54
+ sequence_tokens.each_with_index do |token, i|
55
+ token.start_offset = token.start_offset - offset
56
+ token.end_offset = token.end_offset - offset
57
+ token.position = i
58
+ end
59
+ Token::Sequence.new(
60
+ tokens: sequence_tokens,
61
+ raw_value: component.values[value_index],
62
+ )
63
+ end
64
+ end
65
+
66
+ sig do
67
+ params(tokens: T::Array[Token])
68
+ .returns(T::Enumerable[T::Array[Token]])
69
+ end
70
+ def split_tokens_by_position(tokens)
71
+ tokens.chunk_while do |token, next_token|
72
+ token.preceeds?(next_token)
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,123 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ module Validators
8
+ class FullAddress < FullAddressValidatorBase
9
+ include LogHelper
10
+
11
+ attr_reader :address, :result
12
+ attr_accessor :session
13
+
14
+ sig { params(address: TAddress, result: Result).void }
15
+ def initialize(address:, result: Result.new)
16
+ super
17
+ @session = T.let(Session.new(address: address, matching_strategy: MatchingStrategies::Es), Session)
18
+ end
19
+
20
+ sig { override.returns(Result) }
21
+ def validate
22
+ return result if concerns_preclude_validation
23
+
24
+ candidate_result = build_candidate_result
25
+ candidate_result.update_result
26
+ publish_notification(candidate_result: candidate_result)
27
+ result
28
+ end
29
+
30
+ sig { returns(AddressValidation::Validators::FullAddress::CandidateResultBase) }
31
+ def build_candidate_result
32
+ unless supported_address?(address)
33
+ return AddressValidation::Validators::FullAddress::UnsupportedScriptResult.new(session:, result:)
34
+ end
35
+
36
+ if best_candidate.nil?
37
+ AddressValidation::Validators::FullAddress::NoCandidateResult.new(session:, result:)
38
+ else
39
+ AddressValidation::Validators::FullAddress::CandidateResult.new(
40
+ candidate: T.must(best_candidate),
41
+ result: result,
42
+ session: session,
43
+ )
44
+ end
45
+ end
46
+
47
+ private
48
+
49
+ sig { returns(T.nilable(CandidateTuple)) }
50
+ def best_candidate
51
+ @best_candidate ||= T.let(
52
+ begin
53
+ index_locales = CountryProfile.for(address.country_code).validation.index_locales
54
+
55
+ candidate_futures = if index_locales.empty?
56
+ [best_candidate_future]
57
+ else
58
+ index_locales.map { |locale| best_candidate_future(locale) }
59
+ end
60
+
61
+ candidate_futures.filter_map(&:value!).min
62
+ ensure
63
+ # We want our futures to complete even when we do not consume their value.
64
+ candidate_futures&.map(&:wait!)
65
+ end,
66
+ T.nilable(CandidateTuple),
67
+ )
68
+ end
69
+
70
+ sig { params(locale: T.nilable(String)).returns(Concurrent::Promises::Future) }
71
+ def best_candidate_future(locale = nil)
72
+ AddressValidation::Es::CandidateSelector.new(
73
+ datastore: session.datastore(locale: locale),
74
+ address: session.address,
75
+ ).best_candidate_async
76
+ end
77
+
78
+ sig do
79
+ params(candidate_result: T.nilable(AddressValidation::Validators::FullAddress::CandidateResultBase))
80
+ .returns(T.untyped)
81
+ end
82
+ def publish_notification(candidate_result: nil)
83
+ ActiveSupport::Notifications.instrument("atlas-engine.address_validation.validation_completed", {
84
+ candidate_result: candidate_result,
85
+ result: result,
86
+ }.compact)
87
+ end
88
+
89
+ sig { returns(T::Boolean) }
90
+ def concerns_preclude_validation
91
+ has_error_concerns? || exceeds_max_token_length?
92
+ end
93
+
94
+ sig { returns(T::Boolean) }
95
+ def has_error_concerns?
96
+ error_concerns = result.concerns.select { |concern| concern.type == Concern::TYPES[:error] }
97
+ error_concerns.flat_map(&:field_names).intersect?([
98
+ :country,
99
+ :province,
100
+ :city,
101
+ :zip,
102
+ :address1,
103
+ :address2,
104
+ ])
105
+ end
106
+
107
+ sig { returns(T::Boolean) }
108
+ def exceeds_max_token_length?
109
+ result.concerns.flat_map(&:code).intersect?([
110
+ :address1_contains_too_many_words,
111
+ :address2_contains_too_many_words,
112
+ ])
113
+ end
114
+
115
+ sig { params(address: TAddress).returns(T::Boolean) }
116
+ def supported_address?(address)
117
+ RestrictionEvaluator.new(address).supported_address?
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,18 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ module Validators
8
+ class FullAddressStreet < FullAddress
9
+ sig { params(address: TAddress, result: Result).void }
10
+ def initialize(address:, result: Result.new)
11
+ super
12
+ @session = T.let(Session.new(address: address, matching_strategy: MatchingStrategies::EsStreet), Session)
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,37 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Es
7
+ module Validators
8
+ class RestrictionEvaluator
9
+ extend T::Sig
10
+ attr_reader :address
11
+
12
+ sig { params(address: AtlasEngine::AddressValidation::AbstractAddress).void }
13
+ def initialize(address)
14
+ @address = address
15
+ end
16
+
17
+ sig { returns(T::Boolean) }
18
+ def supported_address?
19
+ country_profile = CountryProfile.for(T.must(address.country_code))
20
+
21
+ country_profile.attributes.dig("validation", "restrictions").map do |restriction|
22
+ class_name = restriction["class"]
23
+ additional_params = restriction["params"]&.transform_keys(&:to_sym)
24
+
25
+ params = { address: address }
26
+ params = params.merge!({ params: additional_params }) if additional_params.present?
27
+
28
+ return false if class_name.constantize.send(:apply?, **params)
29
+ end
30
+
31
+ true
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end