atlas_engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +123 -0
  3. data/Rakefile +20 -0
  4. data/app/assets/config/atlas_engine_manifest.js +3 -0
  5. data/app/assets/stylesheets/atlas_engine/application.css +15 -0
  6. data/app/concerns/atlas_engine/handles_blob.rb +26 -0
  7. data/app/concerns/atlas_engine/handles_interruption.rb +22 -0
  8. data/app/controllers/atlas_engine/application_controller.rb +7 -0
  9. data/app/controllers/atlas_engine/connectivity_controller.rb +21 -0
  10. data/app/controllers/atlas_engine/country_imports_controller.rb +73 -0
  11. data/app/controllers/atlas_engine/graphql_controller.rb +59 -0
  12. data/app/countries/atlas_engine/ar/country_profile.yml +9 -0
  13. data/app/countries/atlas_engine/at/address_importer/corrections/open_address/city_corrector.rb +23 -0
  14. data/app/countries/atlas_engine/at/country_profile.yml +24 -0
  15. data/app/countries/atlas_engine/at/index_configuration.yml +63 -0
  16. data/app/countries/atlas_engine/at/synonyms.yml +6 -0
  17. data/app/countries/atlas_engine/at/validation_transcriber/address_parser.rb +58 -0
  18. data/app/countries/atlas_engine/au/address_importer/open_address/filter.rb +26 -0
  19. data/app/countries/atlas_engine/au/address_importer/open_address/mapper.rb +41 -0
  20. data/app/countries/atlas_engine/au/country_profile.yml +13 -0
  21. data/app/countries/atlas_engine/au/synonyms.yml +209 -0
  22. data/app/countries/atlas_engine/au/validation_transcriber/address_parser.rb +121 -0
  23. data/app/countries/atlas_engine/be/country_profile.yml +12 -0
  24. data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +38 -0
  25. data/app/countries/atlas_engine/bm/address_importer/open_address/mapper.rb +40 -0
  26. data/app/countries/atlas_engine/bm/country_profile.yml +12 -0
  27. data/app/countries/atlas_engine/br/country_profile.yml +4 -0
  28. data/app/countries/atlas_engine/ca/country_profile.yml +7 -0
  29. data/app/countries/atlas_engine/ca/synonyms.yml +1615 -0
  30. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/city_corrector.rb +29 -0
  31. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/locale_corrector.rb +74 -0
  32. data/app/countries/atlas_engine/ch/address_importer/open_address/mapper.rb +40 -0
  33. data/app/countries/atlas_engine/ch/country_profile.yml +15 -0
  34. data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +15 -0
  35. data/app/countries/atlas_engine/ch/locales/de/index_configuration.yml +63 -0
  36. data/app/countries/atlas_engine/ch/locales/de/synonyms.yml +7 -0
  37. data/app/countries/atlas_engine/ch/locales/fr/synonyms.yml +21 -0
  38. data/app/countries/atlas_engine/cz/country_profile.yml +6 -0
  39. data/app/countries/atlas_engine/de/country_profile.yml +19 -0
  40. data/app/countries/atlas_engine/de/index_configuration.yml +64 -0
  41. data/app/countries/atlas_engine/de/synonyms.yml +2 -0
  42. data/app/countries/atlas_engine/de/validation_transcriber/address_parser.rb +19 -0
  43. data/app/countries/atlas_engine/dk/country_profile.yml +6 -0
  44. data/app/countries/atlas_engine/dk/synonyms.yml +3 -0
  45. data/app/countries/atlas_engine/dk/validation_transcriber/address_parser.rb +21 -0
  46. data/app/countries/atlas_engine/fo/country_profile.yml +5 -0
  47. data/app/countries/atlas_engine/fr/address_importer/corrections/open_address/city_corrector.rb +28 -0
  48. data/app/countries/atlas_engine/fr/country_profile.yml +13 -0
  49. data/app/countries/atlas_engine/fr/synonyms.yml +21 -0
  50. data/app/countries/atlas_engine/fr/validation_transcriber/address_parser.rb +34 -0
  51. data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +98 -0
  52. data/app/countries/atlas_engine/gb/country_profile.yml +10 -0
  53. data/app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb +164 -0
  54. data/app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb +120 -0
  55. data/app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb +39 -0
  56. data/app/countries/atlas_engine/gg/country_profile.yml +7 -0
  57. data/app/countries/atlas_engine/ie/country_profile.yml +3 -0
  58. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb +27 -0
  59. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/province_corrector.rb +29 -0
  60. data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +42 -0
  61. data/app/countries/atlas_engine/it/country_profile.yml +11 -0
  62. data/app/countries/atlas_engine/jp/address_validation/es/data_mapper.rb +63 -0
  63. data/app/countries/atlas_engine/jp/country_profile.yml +6 -0
  64. data/app/countries/atlas_engine/kr/address_importer/open_address/mapper.rb +41 -0
  65. data/app/countries/atlas_engine/kr/country_profile.yml +11 -0
  66. data/app/countries/atlas_engine/li/address_importer/corrections/open_address/city_corrector.rb +25 -0
  67. data/app/countries/atlas_engine/li/country_profile.yml +21 -0
  68. data/app/countries/atlas_engine/li/index_configuration.yml +63 -0
  69. data/app/countries/atlas_engine/li/synonyms.yml +6 -0
  70. data/app/countries/atlas_engine/lt/country_profile.yml +6 -0
  71. data/app/countries/atlas_engine/lt/synonyms.yml +7 -0
  72. data/app/countries/atlas_engine/lt/validation_transcriber/address_parser.rb +24 -0
  73. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb +54 -0
  74. data/app/countries/atlas_engine/lu/country_profile.yml +12 -0
  75. data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  76. data/app/countries/atlas_engine/nl/country_profile.yml +18 -0
  77. data/app/countries/atlas_engine/nl/index_configuration.yml +52 -0
  78. data/app/countries/atlas_engine/nl/synonyms.yml +92 -0
  79. data/app/countries/atlas_engine/nl/validation_transcriber/address_parser.rb +85 -0
  80. data/app/countries/atlas_engine/no/country_profile.yml +5 -0
  81. data/app/countries/atlas_engine/nz/country_profile.yml +3 -0
  82. data/app/countries/atlas_engine/pl/country_profile.yml +5 -0
  83. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +19 -0
  84. data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb +32 -0
  85. data/app/countries/atlas_engine/pt/address_importer/open_address/mapper.rb +39 -0
  86. data/app/countries/atlas_engine/pt/country_profile.yml +10 -0
  87. data/app/countries/atlas_engine/pt/synonyms.yml +7 -0
  88. data/app/countries/atlas_engine/sa/country_profile.yml +10 -0
  89. data/app/countries/atlas_engine/se/country_profile.yml +5 -0
  90. data/app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb +38 -0
  91. data/app/countries/atlas_engine/tt/country_profile.yml +7 -0
  92. data/app/countries/atlas_engine/us/country_profile.yml +12 -0
  93. data/app/countries/atlas_engine/us/synonyms.yml +350 -0
  94. data/app/graphql/atlas_engine/errors/locale_unsupported_error.rb +17 -0
  95. data/app/graphql/atlas_engine/schema.graphql +1293 -0
  96. data/app/graphql/atlas_engine/schema.rb +23 -0
  97. data/app/graphql/atlas_engine/types/address_validation/address_input.rb +51 -0
  98. data/app/graphql/atlas_engine/types/address_validation/concern_type.rb +20 -0
  99. data/app/graphql/atlas_engine/types/address_validation/enums/concern_enum.rb +15 -0
  100. data/app/graphql/atlas_engine/types/address_validation/field_type.rb +15 -0
  101. data/app/graphql/atlas_engine/types/address_validation/suggestion_type.rb +21 -0
  102. data/app/graphql/atlas_engine/types/base_argument.rb +9 -0
  103. data/app/graphql/atlas_engine/types/base_enum.rb +9 -0
  104. data/app/graphql/atlas_engine/types/base_field.rb +10 -0
  105. data/app/graphql/atlas_engine/types/base_input_object.rb +9 -0
  106. data/app/graphql/atlas_engine/types/base_interface.rb +10 -0
  107. data/app/graphql/atlas_engine/types/base_object.rb +9 -0
  108. data/app/graphql/atlas_engine/types/base_scalar.rb +9 -0
  109. data/app/graphql/atlas_engine/types/base_union.rb +9 -0
  110. data/app/graphql/atlas_engine/types/matching_strategy_type.rb +12 -0
  111. data/app/graphql/atlas_engine/types/mutation_type.rb +9 -0
  112. data/app/graphql/atlas_engine/types/query_type.rb +61 -0
  113. data/app/graphql/atlas_engine/types/validation_supported_country.rb +12 -0
  114. data/app/graphql/atlas_engine/types/validation_type.rb +22 -0
  115. data/app/helpers/atlas_engine/address_importer/import_log_helper.rb +66 -0
  116. data/app/helpers/atlas_engine/application_helper.rb +7 -0
  117. data/app/helpers/atlas_engine/locale_format_helper.rb +40 -0
  118. data/app/helpers/atlas_engine/log_base.rb +32 -0
  119. data/app/helpers/atlas_engine/log_helper.rb +24 -0
  120. data/app/helpers/atlas_engine/metrics_helper.rb +25 -0
  121. data/app/jobs/atlas_engine/address_importer/clear_records_job.rb +39 -0
  122. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +212 -0
  123. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_launcher_job.rb +67 -0
  124. data/app/jobs/atlas_engine/address_importer/open_address/prepares_geo_json_file.rb +41 -0
  125. data/app/jobs/atlas_engine/address_importer/resumable_import_job.rb +49 -0
  126. data/app/jobs/atlas_engine/address_importer/street_backfill_job.rb +63 -0
  127. data/app/jobs/atlas_engine/application_job.rb +10 -0
  128. data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +43 -0
  129. data/app/lib/atlas_engine/concern_formatter.rb +40 -0
  130. data/app/lib/atlas_engine/restrictions/base.rb +20 -0
  131. data/app/lib/atlas_engine/restrictions/unsupported_script.rb +31 -0
  132. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +201 -0
  133. data/app/lib/atlas_engine/validation_transcriber/address_parser_factory.rb +27 -0
  134. data/app/lib/atlas_engine/validation_transcriber/address_parser_north_america.rb +39 -0
  135. data/app/lib/atlas_engine/validation_transcriber/address_parser_oceanic.rb +17 -0
  136. data/app/lib/atlas_engine/validation_transcriber/address_parser_preprocessor.rb +132 -0
  137. data/app/lib/atlas_engine/validation_transcriber/address_parsing_helper.rb +38 -0
  138. data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +54 -0
  139. data/app/lib/atlas_engine/validation_transcriber/constants.rb +50 -0
  140. data/app/lib/atlas_engine/validation_transcriber/english_street_parser.rb +59 -0
  141. data/app/lib/atlas_engine/validation_transcriber/formatter.rb +46 -0
  142. data/app/lib/atlas_engine/validation_transcriber/french_street_parser.rb +50 -0
  143. data/app/lib/atlas_engine/validation_transcriber/province_code_normalizer.rb +45 -0
  144. data/app/lib/atlas_engine/validation_transcriber/street_parser.rb +18 -0
  145. data/app/lib/atlas_engine/validation_transcriber/zip_normalizer.rb +23 -0
  146. data/app/mailers/atlas_engine/application_mailer.rb +9 -0
  147. data/app/models/atlas_engine/address_importer/corrections/corrector.rb +33 -0
  148. data/app/models/atlas_engine/address_importer/import_events_notifier/base.rb +35 -0
  149. data/app/models/atlas_engine/address_importer/import_events_notifier/notifier.rb +26 -0
  150. data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +46 -0
  151. data/app/models/atlas_engine/address_importer/open_address/feature_helper.rb +110 -0
  152. data/app/models/atlas_engine/address_importer/open_address/filter.rb +17 -0
  153. data/app/models/atlas_engine/address_importer/open_address/loader.rb +27 -0
  154. data/app/models/atlas_engine/address_importer/open_address/transformer.rb +39 -0
  155. data/app/models/atlas_engine/address_importer/open_address.rb +10 -0
  156. data/app/models/atlas_engine/address_importer/validation/base_validator.rb +86 -0
  157. data/app/models/atlas_engine/address_importer/validation/default_validator.rb +27 -0
  158. data/app/models/atlas_engine/address_importer/validation/field_validations/city.rb +47 -0
  159. data/app/models/atlas_engine/address_importer/validation/field_validations/interface.rb +29 -0
  160. data/app/models/atlas_engine/address_importer/validation/field_validations/province.rb +73 -0
  161. data/app/models/atlas_engine/address_importer/validation/field_validations/zip.rb +84 -0
  162. data/app/models/atlas_engine/address_importer/validation/validator.rb +17 -0
  163. data/app/models/atlas_engine/address_importer/validation/wrapper.rb +70 -0
  164. data/app/models/atlas_engine/address_number.rb +36 -0
  165. data/app/models/atlas_engine/address_number_range.rb +200 -0
  166. data/app/models/atlas_engine/address_validation/abstract_address.rb +49 -0
  167. data/app/models/atlas_engine/address_validation/address.rb +47 -0
  168. data/app/models/atlas_engine/address_validation/candidate.rb +109 -0
  169. data/app/models/atlas_engine/address_validation/candidate_tuple.rb +15 -0
  170. data/app/models/atlas_engine/address_validation/concern.rb +74 -0
  171. data/app/models/atlas_engine/address_validation/concern_producer.rb +19 -0
  172. data/app/models/atlas_engine/address_validation/concern_queue.rb +20 -0
  173. data/app/models/atlas_engine/address_validation/concern_record.rb +122 -0
  174. data/app/models/atlas_engine/address_validation/datastore_base.rb +27 -0
  175. data/app/models/atlas_engine/address_validation/errors.rb +13 -0
  176. data/app/models/atlas_engine/address_validation/es/candidate_selector.rb +70 -0
  177. data/app/models/atlas_engine/address_validation/es/data_mappers/decompounding_data_mapper.rb +39 -0
  178. data/app/models/atlas_engine/address_validation/es/data_mappers/default_data_mapper.rb +110 -0
  179. data/app/models/atlas_engine/address_validation/es/datastore.rb +229 -0
  180. data/app/models/atlas_engine/address_validation/es/default_query_builder.rb +30 -0
  181. data/app/models/atlas_engine/address_validation/es/query_builder.rb +160 -0
  182. data/app/models/atlas_engine/address_validation/es/term_vectors.rb +78 -0
  183. data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +123 -0
  184. data/app/models/atlas_engine/address_validation/es/validators/full_address_street.rb +18 -0
  185. data/app/models/atlas_engine/address_validation/es/validators/restriction_evaluator.rb +37 -0
  186. data/app/models/atlas_engine/address_validation/field.rb +30 -0
  187. data/app/models/atlas_engine/address_validation/full_address_validator_base.rb +27 -0
  188. data/app/models/atlas_engine/address_validation/log_emitter.rb +66 -0
  189. data/app/models/atlas_engine/address_validation/matching_strategies.rb +16 -0
  190. data/app/models/atlas_engine/address_validation/normalizer.rb +38 -0
  191. data/app/models/atlas_engine/address_validation/predicate_pipeline.rb +80 -0
  192. data/app/models/atlas_engine/address_validation/request.rb +12 -0
  193. data/app/models/atlas_engine/address_validation/result.rb +154 -0
  194. data/app/models/atlas_engine/address_validation/runs_validation.rb +16 -0
  195. data/app/models/atlas_engine/address_validation/session.rb +47 -0
  196. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +72 -0
  197. data/app/models/atlas_engine/address_validation/strategies.rb +10 -0
  198. data/app/models/atlas_engine/address_validation/suggestion.rb +97 -0
  199. data/app/models/atlas_engine/address_validation/token/comparator.rb +44 -0
  200. data/app/models/atlas_engine/address_validation/token/comparison.rb +76 -0
  201. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +158 -0
  202. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +166 -0
  203. data/app/models/atlas_engine/address_validation/token/sequence.rb +147 -0
  204. data/app/models/atlas_engine/address_validation/token/synonyms.rb +77 -0
  205. data/app/models/atlas_engine/address_validation/token.rb +113 -0
  206. data/app/models/atlas_engine/address_validation/validator.rb +147 -0
  207. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +97 -0
  208. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +164 -0
  209. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result_base.rb +46 -0
  210. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +135 -0
  211. data/app/models/atlas_engine/address_validation/validators/full_address/components_to_validate.rb +88 -0
  212. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +127 -0
  213. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +23 -0
  214. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_concern_builder.rb +42 -0
  215. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_country_concern.rb +37 -0
  216. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_province_concern.rb +37 -0
  217. data/app/models/atlas_engine/address_validation/validators/full_address/no_candidate_result.rb +26 -0
  218. data/app/models/atlas_engine/address_validation/validators/full_address/number_comparison.rb +31 -0
  219. data/app/models/atlas_engine/address_validation/validators/full_address/postal_code_matcher.rb +60 -0
  220. data/app/models/atlas_engine/address_validation/validators/full_address/result_updater.rb +42 -0
  221. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +140 -0
  222. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_address_concern.rb +30 -0
  223. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_province_concern.rb +38 -0
  224. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_zip_for_address_concern.rb +32 -0
  225. data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern.rb +84 -0
  226. data/app/models/atlas_engine/address_validation/validators/full_address/unsupported_script_result.rb +22 -0
  227. data/app/models/atlas_engine/address_validation/validators/predicates/cache.rb +38 -0
  228. data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +36 -0
  229. data/app/models/atlas_engine/address_validation/validators/predicates/country/exists.rb +34 -0
  230. data/app/models/atlas_engine/address_validation/validators/predicates/country/valid_for_zip.rb +60 -0
  231. data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +38 -0
  232. data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +39 -0
  233. data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +38 -0
  234. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +34 -0
  235. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_token_count.rb +63 -0
  236. data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +41 -0
  237. data/app/models/atlas_engine/address_validation/validators/predicates/predicate.rb +37 -0
  238. data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +43 -0
  239. data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +48 -0
  240. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +45 -0
  241. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +43 -0
  242. data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +35 -0
  243. data/app/models/atlas_engine/address_validation/validators/predicates/zip/present.rb +58 -0
  244. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_country.rb +45 -0
  245. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_province.rb +55 -0
  246. data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +25 -0
  247. data/app/models/atlas_engine/address_validation/zip_truncator.rb +32 -0
  248. data/app/models/atlas_engine/application_record.rb +8 -0
  249. data/app/models/atlas_engine/coded_error.rb +18 -0
  250. data/app/models/atlas_engine/coded_errors.rb +17 -0
  251. data/app/models/atlas_engine/country_import.rb +44 -0
  252. data/app/models/atlas_engine/country_profile.rb +270 -0
  253. data/app/models/atlas_engine/country_profile_ingestion_subset.rb +42 -0
  254. data/app/models/atlas_engine/country_profile_subset_base.rb +22 -0
  255. data/app/models/atlas_engine/country_profile_validation_subset.rb +48 -0
  256. data/app/models/atlas_engine/country_repository.rb +110 -0
  257. data/app/models/atlas_engine/elasticsearch/client.rb +116 -0
  258. data/app/models/atlas_engine/elasticsearch/client_interface.rb +89 -0
  259. data/app/models/atlas_engine/elasticsearch/repository.rb +246 -0
  260. data/app/models/atlas_engine/elasticsearch/repository_interface.rb +82 -0
  261. data/app/models/atlas_engine/elasticsearch/response.rb +20 -0
  262. data/app/models/atlas_engine/event.rb +12 -0
  263. data/app/models/atlas_engine/field_decompounder.rb +36 -0
  264. data/app/models/atlas_engine/index_configuration_factory.rb +188 -0
  265. data/app/models/atlas_engine/post_address.rb +114 -0
  266. data/app/models/atlas_engine/post_address_importer.rb +34 -0
  267. data/app/models/atlas_engine/services/service_helper.rb +21 -0
  268. data/app/models/atlas_engine/services/validation.rb +65 -0
  269. data/app/models/atlas_engine/services/validation_eligibility.rb +18 -0
  270. data/app/models/atlas_engine/street.rb +34 -0
  271. data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +106 -0
  272. data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +29 -0
  273. data/app/views/atlas_engine/connectivity/index.html.erb +50 -0
  274. data/app/views/atlas_engine/country_imports/index.html.erb +49 -0
  275. data/app/views/atlas_engine/country_imports/show.html.erb +73 -0
  276. data/app/views/layouts/atlas_engine/application.html.erb +15 -0
  277. data/config/initializers/1.ruby_patches.rb +18 -0
  278. data/config/initializers/sorbet.rb +5 -0
  279. data/config/initializers/worldwide.rb +5 -0
  280. data/config/locales/internal/en.yml +14 -0
  281. data/config/routes.rb +17 -0
  282. data/db/data/address_synonyms/index_configurations/default.yml +141 -0
  283. data/db/data/country_profiles/default.yml +23 -0
  284. data/db/data/transcriber.yml +760 -0
  285. data/db/data/validation_pipelines/es.yml +58 -0
  286. data/db/data/validation_pipelines/es_street.yml +58 -0
  287. data/db/data/validation_pipelines/local.yml +60 -0
  288. data/db/migrate/20230919173037_create_atlas_engine_post_addresses.rb +25 -0
  289. data/db/migrate/20231117142735_add_building_and_unit_ranges_column.rb +7 -0
  290. data/db/migrate/20231117143536_create_atlas_engine_country_imports.rb +11 -0
  291. data/db/migrate/20231117145844_create_atlas_engine_events_table.rb +13 -0
  292. data/db/migrate/20231123153554_add_unique_index_to_atlas_engine_post_addresses.rb +14 -0
  293. data/db/migrate/20231123154658_add_index_to_post_addresses_on_source_id_locale_country_code.rb +12 -0
  294. data/lib/atlas_engine/engine.rb +10 -0
  295. data/lib/atlas_engine/version.rb +6 -0
  296. data/lib/atlas_engine.rb +66 -0
  297. data/lib/tasks/atlas_engine/address_importer.rake +20 -0
  298. metadata +553 -0
@@ -0,0 +1,212 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ # The GeoJsonImportJob loads a GeoJSON file into the PostAddress table.
5
+ # It is designed to be resumable, according to JobIteration best practices.
6
+ # The input file may optionally be gzip-compressed. It's read in chunks of 10_000 rows.
7
+ # The ETL process is as follows:
8
+ # - Extract: each row is evaluated with JSON.parse. Next, the GeoJSON feature contained
9
+ # is passed to a Filter instance, which may return false to reject the row. The Filter class
10
+ # is specified in country_profile under open_address/filter.
11
+ # - Transform: a Transformer instance receives the feature, and returns zero or more hashes
12
+ # that match the PostAddress table/schema. The Transformer class is also specified in
13
+ # country_profile, under open_address/filter.
14
+ # - Transform: after the Transformer, addresses are passed to a Corrections::Corrector, which runs one or
15
+ # more dedicated Correctors. These modify the passed address inline, and may clear the address hash to reject it.
16
+ # The Corrector classes are specified in country_profile under ingestion/correctors/open_address.
17
+ # - Load: addresses are upserted into the PostAddresses table. When two addresses have the same
18
+ # province + locale + city + street + zip (a conflict on index_post_addresses_on_pc_zp_st_ct_lc),
19
+ # the building_and_unit_ranges field is merged using JSON_MERGE and other fields (like lat, lon) are overwritten.
20
+ module AtlasEngine
21
+ module AddressImporter
22
+ module OpenAddress
23
+ class GeoJsonImportJob < AddressImporter::ResumableImportJob
24
+ extend T::Sig
25
+ include HandlesInterruption
26
+ include PreparesGeoJsonFile
27
+ attr_reader :geojson_path, :country_import, :country_code, :loader, :transformer
28
+
29
+ CHUNK_SIZE = 10_000
30
+ REPORT_STEP = 5
31
+
32
+ around_perform :setup_and_download
33
+
34
+ # Setup boilerplate: JobIteration doesn't let us override #perform. Instead
35
+ # the around_perform callback is used for that.
36
+ def setup_and_download(&block)
37
+ @loader = Loader.new
38
+ @country_code = argument(:country_code)
39
+ @geojson_path = Pathname.new(argument(:geojson_file_path))
40
+ @locale = argument(:locale)&.downcase
41
+ @country_import = CountryImport.find(argument(:country_import_id))
42
+ @transformer = Transformer.new(country_import: country_import, locale: @locale)
43
+
44
+ import_log_info(
45
+ country_import: country_import,
46
+ message: "Downloading geojson file",
47
+ additional_params: { file_path: geojson_path.to_s },
48
+ )
49
+
50
+ download_geojson(&block)
51
+ end
52
+
53
+ StringProps = T.type_alias { T::Hash[String, T.untyped] }
54
+ BatchOfRows = T.type_alias { T::Array[StringProps] }
55
+
56
+ # Part of JobIteration: returns an Enumerator that yields batches of addresses, and the cursor position.
57
+ # If cursor is present, the enumerator starts at that position. This stage does extraction (parsing JSON)
58
+ # and filtering.
59
+ sig do
60
+ params(
61
+ params: T::Hash[Symbol, T.untyped],
62
+ cursor: T.untyped,
63
+ ).returns(T::Enumerator[[BatchOfRows, Integer]])
64
+ end
65
+ def build_enumerator(params, cursor:)
66
+ start_at = if cursor.nil?
67
+ import_log_info(country_import: country_import, message: "Importing whole file")
68
+ 0
69
+ else
70
+ import_log_info(country_import: country_import, message: "Starting import at chunk #{cursor}")
71
+ cursor.to_i
72
+ end
73
+
74
+ io.each
75
+ # NOTE: The bigger the chunk size, the less rountrips to MySQL, and therefore faster.
76
+ .each_slice(CHUNK_SIZE)
77
+ .lazy
78
+ .drop(start_at) # Cursor is chunk number. When resuming, skip that many chunks.
79
+ .with_index(start_at) # Include skipped chunks in numbering
80
+ .map do |lines, chunk_num|
81
+ track_progress(chunk_num)
82
+ [lines.map { JSON.parse(_1) }, chunk_num]
83
+ end
84
+ .map do |features, chunk_num|
85
+ [features.select(&row_filter), chunk_num]
86
+ end
87
+ end
88
+
89
+ # Part of JobIteration: ran for each batch of rows. This stage does transformation
90
+ # (converting a GeoJSON feature into a hash, then applying correctors) and loading (upserting into PostAddress).
91
+ sig { params(batch: BatchOfRows, element_id: T.untyped).void }
92
+ def each_iteration(batch, element_id)
93
+ exit_if_interrupted!(country_import)
94
+
95
+ addresses = attributes_from_batch(batch)
96
+ return if addresses.blank?
97
+
98
+ condensed = condense_addresses(addresses)
99
+
100
+ loader.load(condensed)
101
+ end
102
+
103
+ sig do
104
+ params(addresses: T::Array[T::Hash[Symbol,
105
+ T.untyped]]).returns(T::Array[T.nilable(T::Hash[Symbol, T.untyped])])
106
+ end
107
+ def condense_addresses(addresses)
108
+ addresses
109
+ .group_by { |attrs| [attrs[:province_code], attrs[:locale], attrs[:city], attrs[:street], attrs[:zip]] }
110
+ .map do |(_province_code, _locale, _city, _street, _zip), matched_addresses|
111
+ matched_addresses.reduce do |acc, matched_address|
112
+ acc.merge(matched_address) do |key, oldval, newval|
113
+ if key == :building_and_unit_ranges
114
+ oldval.merge(newval)
115
+ else
116
+ newval
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+
123
+ sig { params(batch: BatchOfRows).returns(T::Array[T::Hash[Symbol, T.untyped]]) }
124
+ def attributes_from_batch(batch)
125
+ batch
126
+ .filter_map do |feature|
127
+ attrs = transformer.transform(feature)
128
+ if attrs.nil?
129
+ incr_invalid_lines
130
+ next
131
+ end
132
+
133
+ attrs
134
+ end
135
+ end
136
+
137
+ sig { params(chunk_num: Integer).void }
138
+ def track_progress(chunk_num)
139
+ return unless chunk_num % REPORT_STEP == 0
140
+
141
+ lines_parsed = chunk_num * CHUNK_SIZE
142
+ import_log_info(
143
+ country_import: country_import,
144
+ message: "Processing chunk #{chunk_num}, lines parsed so far: #{lines_parsed}",
145
+ )
146
+
147
+ if lines_parsed != invalid_lines
148
+ import_log_info(
149
+ country_import: country_import,
150
+ message: "Lines discarded: #{invalid_lines}",
151
+ category: :invalid_address,
152
+ )
153
+ end
154
+ end
155
+
156
+ sig { returns(CountryProfile) }
157
+ def country_profile
158
+ @country_profile ||= CountryProfile.for(country_code)
159
+ end
160
+
161
+ FilterType = T.type_alias { T.proc.params(arg0: StringProps).returns(T::Boolean) }
162
+ # Returns a callable that takes a row and returns true if it should be imported
163
+ sig { returns(FilterType) }
164
+ def row_filter
165
+ @row_filter ||= case country_profile.open_address[:filter]
166
+ in nil # Undefined: let everything through
167
+ ->(_row) { true }
168
+ in /\w+(::\w+)+/ => sym # Class name
169
+ cls = sym.constantize
170
+ inst = cls.new(country_import: country_import)
171
+ inst.method(:filter).to_proc
172
+ end
173
+ end
174
+
175
+ Corrector = AddressImporter::Corrections::Corrector
176
+ # Returns a Corrector instance, or nil if no correctors are defined for this country.
177
+ sig { returns(T.nilable(Corrector)) }
178
+ def corrector
179
+ @corrector ||= if country_profile.ingestion.correctors(source: "open_address").empty?
180
+ nil
181
+ else
182
+ Corrector.new(
183
+ country_code: country_code,
184
+ source: "open_address",
185
+ )
186
+ end
187
+ end
188
+
189
+ # Returns an IO-like object that reads the geojson file.
190
+ # Returns untyped because GzipReader is IO-like, but not a subclass of IO.
191
+ sig { returns(T.untyped) }
192
+ def io
193
+ Zlib::GzipReader.new(geojson_path.open("rb"))
194
+ end
195
+
196
+ sig { void }
197
+ def incr_invalid_lines
198
+ if @invalid_lines.nil?
199
+ @invalid_lines = 0
200
+ else
201
+ @invalid_lines += 1
202
+ end
203
+ end
204
+
205
+ sig { returns(Integer) }
206
+ def invalid_lines
207
+ @invalid_lines || 0
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,67 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressImporter
6
+ module OpenAddress
7
+ class GeoJsonImportLauncherJob < ApplicationJob
8
+ extend T::Sig
9
+ include ImportLogHelper
10
+
11
+ sig { params(country_code: String, geojson_file_path: String, clear_records: T::Boolean, locale: String).void }
12
+ def perform(country_code:, geojson_file_path:, clear_records:, locale:)
13
+ import = CountryImport.create!(country_code: country_code)
14
+ import.start!
15
+
16
+ import_log_info(
17
+ country_import: import,
18
+ message: "Starting import",
19
+ additional_params: { file: geojson_file_path },
20
+ notify: true,
21
+ )
22
+
23
+ geojson_file_paths = geojson_file_path.split(",")
24
+
25
+ geojson_import_jobs = geojson_file_paths.map do |geojson_file_path|
26
+ geojson_job_args = {
27
+ country_code: country_code,
28
+ country_import_id: import.id,
29
+ geojson_file_path: geojson_file_path,
30
+ locale: locale,
31
+ }
32
+ { job_name: GeoJsonImportJob, job_args: geojson_job_args }
33
+ end
34
+
35
+ street_backfill_job = {
36
+ job_name: AddressImporter::StreetBackfillJob,
37
+ job_args: { country_code: country_code, country_import_id: import.id },
38
+ }
39
+
40
+ if clear_records
41
+ import_log_info(country_import: import, message: "Clearing records before import...")
42
+
43
+ AddressImporter::ClearRecordsJob.perform_later(
44
+ country_import_id: import.id,
45
+ country_code: country_code.upcase,
46
+ followed_by: geojson_import_jobs + [street_backfill_job],
47
+ )
48
+ else
49
+ import_log_info(country_import: import, message: "Importing without clearing records...")
50
+
51
+ GeoJsonImportJob.perform_later(
52
+ **T.must(geojson_import_jobs.first)[:job_args],
53
+ followed_by: geojson_import_jobs.drop(1) + [street_backfill_job],
54
+ )
55
+ end
56
+ rescue StandardError => e
57
+ import_log_error(
58
+ country_import: T.must(import),
59
+ message: "Import failed with #{e.class}",
60
+ additional_params: { error: e },
61
+ )
62
+ import&.interrupt
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,41 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressImporter
6
+ module OpenAddress
7
+ module PreparesGeoJsonFile
8
+ extend T::Sig
9
+ include HandlesBlob
10
+
11
+ ROOT_FOLDER = "openaddress"
12
+
13
+ sig { params(block: T.proc.void).void }
14
+ def download_geojson(&block)
15
+ if @geojson_path.exist?
16
+ yield
17
+ else
18
+ download_from_activestorage do |local_path|
19
+ @geojson_path = local_path
20
+ yield
21
+ end
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ sig do
28
+ params(block: T.proc.params(arg0: Pathname).void).void
29
+ end
30
+ def download_from_activestorage(&block)
31
+ root = Pathname.new(ROOT_FOLDER)
32
+ key = root.join(@geojson_path.basename).to_s
33
+
34
+ download(key) do |fp|
35
+ yield Pathname.new(fp.path)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,49 @@
1
+ # typed: false
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressImporter
6
+ class ResumableImportJob < ApplicationJob
7
+ include JobIteration::Iteration
8
+ include ImportLogHelper
9
+ include Concerns::AddressImporter::HandlesErrors
10
+
11
+ on_complete do |job|
12
+ next_job = job.argument(:followed_by)&.shift
13
+ if next_job.present?
14
+ job_args_with_followed_by = next_job[:job_args].merge({ followed_by: job.argument(:followed_by) })
15
+ next_job[:job_name].perform_later(**job_args_with_followed_by)
16
+ elsif country_import.present?
17
+ country_import.complete!
18
+ log_final_stats
19
+ end
20
+ end
21
+
22
+ sig { void }
23
+ def log_final_stats
24
+ message = if country_import.detected_invalid_addresses?
25
+ "Invalid addresses detected"
26
+ else
27
+ "No invalid addresses detected"
28
+ end
29
+
30
+ import_log_info(
31
+ country_import: country_import,
32
+ message: message,
33
+ notify: true,
34
+ )
35
+
36
+ import_log_info(
37
+ country_import: country_import,
38
+ message: "Import complete!",
39
+ notify: true,
40
+ )
41
+ end
42
+
43
+ def country_import
44
+ country_import_id = argument(:country_import_id)
45
+ CountryImport.find(country_import_id) if country_import_id.present?
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,63 @@
1
+ # typed: false
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressImporter
6
+ class StreetBackfillJob < AddressImporter::ResumableImportJob
7
+ include ImportLogHelper
8
+ include LogHelper
9
+
10
+ def build_enumerator(params, cursor:)
11
+ enumerator_builder.build_times_enumerator(1, cursor: cursor)
12
+ end
13
+
14
+ def each_iteration(batch, params)
15
+ @country_code = params[:country_code]
16
+ @locales = CountryProfile.for(@country_code).validation.index_locales
17
+ return if @locales.nil? || @locales.size < 2
18
+
19
+ import_log_info(
20
+ country_import: country_import,
21
+ message: "Backfilling street data for locales #{@locales}...",
22
+ )
23
+
24
+ ActiveRecord::Base.connection.execute(backfill_streets_sql)
25
+ end
26
+
27
+ private
28
+
29
+ def backfill_streets_sql
30
+ <<-SQL.squish
31
+ UPDATE #{PostAddress.table_name} AS target
32
+ INNER JOIN (
33
+ SELECT
34
+ #{@locales.first}.source_id AS source_id,
35
+ COALESCE(#{@locales.map { |loc| "#{loc}.street" }.join(", ")}) AS final_street,
36
+ #{@locales.map { |loc| "CASE WHEN #{loc}.source_id IS NOT NULL THEN 1 ELSE 0 END AS #{loc}_present" }.join(",")}
37
+ FROM (#{records_by_locale[@locales.first]}) AS #{@locales.first}
38
+ #{join_tables_on_statement}
39
+ HAVING (#{locale_presence_count}) > 1
40
+ ) AS effective ON target.source_id = effective.source_id AND target.country_code = '#{@country_code}'
41
+ SET target.street = effective.final_street
42
+ WHERE target.country_code = '#{@country_code}' AND (target.street = '' OR target.street IS NULL)
43
+ SQL
44
+ end
45
+
46
+ def records_by_locale
47
+ @records_by_locale ||= @locales.index_with do |locale|
48
+ PostAddress.where(country_code: @country_code, locale: locale).to_sql
49
+ end
50
+ end
51
+
52
+ def join_tables_on_statement
53
+ T.must(@locales[1..-1]).map do |loc|
54
+ "LEFT JOIN (#{records_by_locale[loc]}) AS #{loc} ON #{@locales.first}.source_id = #{loc}.source_id"
55
+ end.join("\n")
56
+ end
57
+
58
+ def locale_presence_count
59
+ @locales.map { |loc| "#{loc}_present" }.join(" + ")
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,10 @@
1
+ # typed: false
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ class ApplicationJob < ActiveJob::Base
6
+ def argument(key)
7
+ arguments.first&.fetch(key, nil)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,43 @@
1
+ # typed: false
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Concerns
6
+ module AddressImporter
7
+ module HandlesErrors
8
+ extend ActiveSupport::Concern
9
+ include ::AtlasEngine::AddressImporter::ImportLogHelper
10
+
11
+ included do
12
+ discard_on(StandardError) do |job, exception|
13
+ country_import_id = job.arguments.first[:country_import_id]
14
+ country_import = CountryImport.find(country_import_id)
15
+
16
+ job.import_log_error(
17
+ country_import: country_import,
18
+ message: ":errors: Import failed with exception: #{exception.message}",
19
+ additional_params: { stack_trace: exception.backtrace.inspect },
20
+ )
21
+
22
+ country_import.interrupt! if country_import.present?
23
+ end
24
+
25
+ retry_on(
26
+ Mysql2::Error::ConnectionError,
27
+ wait: 10.seconds,
28
+ attempts: 5,
29
+ ) do |job, exception|
30
+ country_import_id = job.arguments.first[:country_import_id]
31
+ country_import = CountryImport.find(country_import_id)
32
+
33
+ job.import_log_error(country_import: country_import, message:
34
+ "Job failed after 5 retries with error: #{exception.message}")
35
+
36
+ country_import.interrupt! if country_import.present?
37
+ raise exception
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,40 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module ConcernFormatter
6
+ include Kernel # https://github.com/sorbet/sorbet/issues/1109
7
+ extend T::Sig
8
+
9
+ sig { returns(AddressValidation::AbstractAddress) }
10
+ def address
11
+ raise NotImplementedError
12
+ end
13
+
14
+ sig { returns(String) }
15
+ def country_name
16
+ return "" if address.country_code.blank?
17
+
18
+ country.country? && country.full_name ? country.full_name : address.country_code
19
+ end
20
+
21
+ sig { returns(String) }
22
+ def province_name
23
+ return "" if address.country_code.blank? || address.province_code.blank?
24
+
25
+ province.province? && province.full_name ? province.full_name : address.province_code
26
+ end
27
+
28
+ private
29
+
30
+ sig { returns(Worldwide::Region) }
31
+ def country
32
+ @country ||= Worldwide.region(code: address.country_code)
33
+ end
34
+
35
+ sig { returns(Worldwide::Region) }
36
+ def province
37
+ @province ||= country.zone(code: address.province_code)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,20 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Restrictions
6
+ module Base
7
+ extend T::Sig
8
+ extend T::Helpers
9
+ interface!
10
+
11
+ sig do
12
+ abstract.params(
13
+ address: AtlasEngine::AddressValidation::AbstractAddress,
14
+ params: T.untyped,
15
+ ).returns(T::Boolean)
16
+ end
17
+ def apply?(address:, params: nil); end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Restrictions
6
+ class UnsupportedScript
7
+ class << self
8
+ extend T::Sig
9
+ include Base
10
+
11
+ sig do
12
+ override.params(
13
+ address: AtlasEngine::AddressValidation::AbstractAddress,
14
+ params: T.untyped,
15
+ ).returns(T::Boolean)
16
+ end
17
+ def apply?(address:, params: {})
18
+ supported_script = params[:supported_script]
19
+ return false if supported_script.nil?
20
+
21
+ scripts = Worldwide.scripts.identify(
22
+ text: address.address1.to_s + " " + address.address2.to_s + " " + address.city.to_s,
23
+ )
24
+ return false if scripts.empty?
25
+
26
+ scripts.any? { |script| script != supported_script }
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end