atlas_engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +123 -0
  3. data/Rakefile +20 -0
  4. data/app/assets/config/atlas_engine_manifest.js +3 -0
  5. data/app/assets/stylesheets/atlas_engine/application.css +15 -0
  6. data/app/concerns/atlas_engine/handles_blob.rb +26 -0
  7. data/app/concerns/atlas_engine/handles_interruption.rb +22 -0
  8. data/app/controllers/atlas_engine/application_controller.rb +7 -0
  9. data/app/controllers/atlas_engine/connectivity_controller.rb +21 -0
  10. data/app/controllers/atlas_engine/country_imports_controller.rb +73 -0
  11. data/app/controllers/atlas_engine/graphql_controller.rb +59 -0
  12. data/app/countries/atlas_engine/ar/country_profile.yml +9 -0
  13. data/app/countries/atlas_engine/at/address_importer/corrections/open_address/city_corrector.rb +23 -0
  14. data/app/countries/atlas_engine/at/country_profile.yml +24 -0
  15. data/app/countries/atlas_engine/at/index_configuration.yml +63 -0
  16. data/app/countries/atlas_engine/at/synonyms.yml +6 -0
  17. data/app/countries/atlas_engine/at/validation_transcriber/address_parser.rb +58 -0
  18. data/app/countries/atlas_engine/au/address_importer/open_address/filter.rb +26 -0
  19. data/app/countries/atlas_engine/au/address_importer/open_address/mapper.rb +41 -0
  20. data/app/countries/atlas_engine/au/country_profile.yml +13 -0
  21. data/app/countries/atlas_engine/au/synonyms.yml +209 -0
  22. data/app/countries/atlas_engine/au/validation_transcriber/address_parser.rb +121 -0
  23. data/app/countries/atlas_engine/be/country_profile.yml +12 -0
  24. data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +38 -0
  25. data/app/countries/atlas_engine/bm/address_importer/open_address/mapper.rb +40 -0
  26. data/app/countries/atlas_engine/bm/country_profile.yml +12 -0
  27. data/app/countries/atlas_engine/br/country_profile.yml +4 -0
  28. data/app/countries/atlas_engine/ca/country_profile.yml +7 -0
  29. data/app/countries/atlas_engine/ca/synonyms.yml +1615 -0
  30. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/city_corrector.rb +29 -0
  31. data/app/countries/atlas_engine/ch/address_importer/corrections/open_address/locale_corrector.rb +74 -0
  32. data/app/countries/atlas_engine/ch/address_importer/open_address/mapper.rb +40 -0
  33. data/app/countries/atlas_engine/ch/country_profile.yml +15 -0
  34. data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +15 -0
  35. data/app/countries/atlas_engine/ch/locales/de/index_configuration.yml +63 -0
  36. data/app/countries/atlas_engine/ch/locales/de/synonyms.yml +7 -0
  37. data/app/countries/atlas_engine/ch/locales/fr/synonyms.yml +21 -0
  38. data/app/countries/atlas_engine/cz/country_profile.yml +6 -0
  39. data/app/countries/atlas_engine/de/country_profile.yml +19 -0
  40. data/app/countries/atlas_engine/de/index_configuration.yml +64 -0
  41. data/app/countries/atlas_engine/de/synonyms.yml +2 -0
  42. data/app/countries/atlas_engine/de/validation_transcriber/address_parser.rb +19 -0
  43. data/app/countries/atlas_engine/dk/country_profile.yml +6 -0
  44. data/app/countries/atlas_engine/dk/synonyms.yml +3 -0
  45. data/app/countries/atlas_engine/dk/validation_transcriber/address_parser.rb +21 -0
  46. data/app/countries/atlas_engine/fo/country_profile.yml +5 -0
  47. data/app/countries/atlas_engine/fr/address_importer/corrections/open_address/city_corrector.rb +28 -0
  48. data/app/countries/atlas_engine/fr/country_profile.yml +13 -0
  49. data/app/countries/atlas_engine/fr/synonyms.yml +21 -0
  50. data/app/countries/atlas_engine/fr/validation_transcriber/address_parser.rb +34 -0
  51. data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +98 -0
  52. data/app/countries/atlas_engine/gb/country_profile.yml +10 -0
  53. data/app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb +164 -0
  54. data/app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb +120 -0
  55. data/app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb +39 -0
  56. data/app/countries/atlas_engine/gg/country_profile.yml +7 -0
  57. data/app/countries/atlas_engine/ie/country_profile.yml +3 -0
  58. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb +27 -0
  59. data/app/countries/atlas_engine/it/address_importer/corrections/open_address/province_corrector.rb +29 -0
  60. data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +42 -0
  61. data/app/countries/atlas_engine/it/country_profile.yml +11 -0
  62. data/app/countries/atlas_engine/jp/address_validation/es/data_mapper.rb +63 -0
  63. data/app/countries/atlas_engine/jp/country_profile.yml +6 -0
  64. data/app/countries/atlas_engine/kr/address_importer/open_address/mapper.rb +41 -0
  65. data/app/countries/atlas_engine/kr/country_profile.yml +11 -0
  66. data/app/countries/atlas_engine/li/address_importer/corrections/open_address/city_corrector.rb +25 -0
  67. data/app/countries/atlas_engine/li/country_profile.yml +21 -0
  68. data/app/countries/atlas_engine/li/index_configuration.yml +63 -0
  69. data/app/countries/atlas_engine/li/synonyms.yml +6 -0
  70. data/app/countries/atlas_engine/lt/country_profile.yml +6 -0
  71. data/app/countries/atlas_engine/lt/synonyms.yml +7 -0
  72. data/app/countries/atlas_engine/lt/validation_transcriber/address_parser.rb +24 -0
  73. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/locale_corrector.rb +54 -0
  74. data/app/countries/atlas_engine/lu/country_profile.yml +12 -0
  75. data/app/countries/atlas_engine/nl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  76. data/app/countries/atlas_engine/nl/country_profile.yml +18 -0
  77. data/app/countries/atlas_engine/nl/index_configuration.yml +52 -0
  78. data/app/countries/atlas_engine/nl/synonyms.yml +92 -0
  79. data/app/countries/atlas_engine/nl/validation_transcriber/address_parser.rb +85 -0
  80. data/app/countries/atlas_engine/no/country_profile.yml +5 -0
  81. data/app/countries/atlas_engine/nz/country_profile.yml +3 -0
  82. data/app/countries/atlas_engine/pl/country_profile.yml +5 -0
  83. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +19 -0
  84. data/app/countries/atlas_engine/pt/address_importer/corrections/open_address/city_corrector.rb +32 -0
  85. data/app/countries/atlas_engine/pt/address_importer/open_address/mapper.rb +39 -0
  86. data/app/countries/atlas_engine/pt/country_profile.yml +10 -0
  87. data/app/countries/atlas_engine/pt/synonyms.yml +7 -0
  88. data/app/countries/atlas_engine/sa/country_profile.yml +10 -0
  89. data/app/countries/atlas_engine/se/country_profile.yml +5 -0
  90. data/app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb +38 -0
  91. data/app/countries/atlas_engine/tt/country_profile.yml +7 -0
  92. data/app/countries/atlas_engine/us/country_profile.yml +12 -0
  93. data/app/countries/atlas_engine/us/synonyms.yml +350 -0
  94. data/app/graphql/atlas_engine/errors/locale_unsupported_error.rb +17 -0
  95. data/app/graphql/atlas_engine/schema.graphql +1293 -0
  96. data/app/graphql/atlas_engine/schema.rb +23 -0
  97. data/app/graphql/atlas_engine/types/address_validation/address_input.rb +51 -0
  98. data/app/graphql/atlas_engine/types/address_validation/concern_type.rb +20 -0
  99. data/app/graphql/atlas_engine/types/address_validation/enums/concern_enum.rb +15 -0
  100. data/app/graphql/atlas_engine/types/address_validation/field_type.rb +15 -0
  101. data/app/graphql/atlas_engine/types/address_validation/suggestion_type.rb +21 -0
  102. data/app/graphql/atlas_engine/types/base_argument.rb +9 -0
  103. data/app/graphql/atlas_engine/types/base_enum.rb +9 -0
  104. data/app/graphql/atlas_engine/types/base_field.rb +10 -0
  105. data/app/graphql/atlas_engine/types/base_input_object.rb +9 -0
  106. data/app/graphql/atlas_engine/types/base_interface.rb +10 -0
  107. data/app/graphql/atlas_engine/types/base_object.rb +9 -0
  108. data/app/graphql/atlas_engine/types/base_scalar.rb +9 -0
  109. data/app/graphql/atlas_engine/types/base_union.rb +9 -0
  110. data/app/graphql/atlas_engine/types/matching_strategy_type.rb +12 -0
  111. data/app/graphql/atlas_engine/types/mutation_type.rb +9 -0
  112. data/app/graphql/atlas_engine/types/query_type.rb +61 -0
  113. data/app/graphql/atlas_engine/types/validation_supported_country.rb +12 -0
  114. data/app/graphql/atlas_engine/types/validation_type.rb +22 -0
  115. data/app/helpers/atlas_engine/address_importer/import_log_helper.rb +66 -0
  116. data/app/helpers/atlas_engine/application_helper.rb +7 -0
  117. data/app/helpers/atlas_engine/locale_format_helper.rb +40 -0
  118. data/app/helpers/atlas_engine/log_base.rb +32 -0
  119. data/app/helpers/atlas_engine/log_helper.rb +24 -0
  120. data/app/helpers/atlas_engine/metrics_helper.rb +25 -0
  121. data/app/jobs/atlas_engine/address_importer/clear_records_job.rb +39 -0
  122. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +212 -0
  123. data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_launcher_job.rb +67 -0
  124. data/app/jobs/atlas_engine/address_importer/open_address/prepares_geo_json_file.rb +41 -0
  125. data/app/jobs/atlas_engine/address_importer/resumable_import_job.rb +49 -0
  126. data/app/jobs/atlas_engine/address_importer/street_backfill_job.rb +63 -0
  127. data/app/jobs/atlas_engine/application_job.rb +10 -0
  128. data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +43 -0
  129. data/app/lib/atlas_engine/concern_formatter.rb +40 -0
  130. data/app/lib/atlas_engine/restrictions/base.rb +20 -0
  131. data/app/lib/atlas_engine/restrictions/unsupported_script.rb +31 -0
  132. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +201 -0
  133. data/app/lib/atlas_engine/validation_transcriber/address_parser_factory.rb +27 -0
  134. data/app/lib/atlas_engine/validation_transcriber/address_parser_north_america.rb +39 -0
  135. data/app/lib/atlas_engine/validation_transcriber/address_parser_oceanic.rb +17 -0
  136. data/app/lib/atlas_engine/validation_transcriber/address_parser_preprocessor.rb +132 -0
  137. data/app/lib/atlas_engine/validation_transcriber/address_parsing_helper.rb +38 -0
  138. data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +54 -0
  139. data/app/lib/atlas_engine/validation_transcriber/constants.rb +50 -0
  140. data/app/lib/atlas_engine/validation_transcriber/english_street_parser.rb +59 -0
  141. data/app/lib/atlas_engine/validation_transcriber/formatter.rb +46 -0
  142. data/app/lib/atlas_engine/validation_transcriber/french_street_parser.rb +50 -0
  143. data/app/lib/atlas_engine/validation_transcriber/province_code_normalizer.rb +45 -0
  144. data/app/lib/atlas_engine/validation_transcriber/street_parser.rb +18 -0
  145. data/app/lib/atlas_engine/validation_transcriber/zip_normalizer.rb +23 -0
  146. data/app/mailers/atlas_engine/application_mailer.rb +9 -0
  147. data/app/models/atlas_engine/address_importer/corrections/corrector.rb +33 -0
  148. data/app/models/atlas_engine/address_importer/import_events_notifier/base.rb +35 -0
  149. data/app/models/atlas_engine/address_importer/import_events_notifier/notifier.rb +26 -0
  150. data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +46 -0
  151. data/app/models/atlas_engine/address_importer/open_address/feature_helper.rb +110 -0
  152. data/app/models/atlas_engine/address_importer/open_address/filter.rb +17 -0
  153. data/app/models/atlas_engine/address_importer/open_address/loader.rb +27 -0
  154. data/app/models/atlas_engine/address_importer/open_address/transformer.rb +39 -0
  155. data/app/models/atlas_engine/address_importer/open_address.rb +10 -0
  156. data/app/models/atlas_engine/address_importer/validation/base_validator.rb +86 -0
  157. data/app/models/atlas_engine/address_importer/validation/default_validator.rb +27 -0
  158. data/app/models/atlas_engine/address_importer/validation/field_validations/city.rb +47 -0
  159. data/app/models/atlas_engine/address_importer/validation/field_validations/interface.rb +29 -0
  160. data/app/models/atlas_engine/address_importer/validation/field_validations/province.rb +73 -0
  161. data/app/models/atlas_engine/address_importer/validation/field_validations/zip.rb +84 -0
  162. data/app/models/atlas_engine/address_importer/validation/validator.rb +17 -0
  163. data/app/models/atlas_engine/address_importer/validation/wrapper.rb +70 -0
  164. data/app/models/atlas_engine/address_number.rb +36 -0
  165. data/app/models/atlas_engine/address_number_range.rb +200 -0
  166. data/app/models/atlas_engine/address_validation/abstract_address.rb +49 -0
  167. data/app/models/atlas_engine/address_validation/address.rb +47 -0
  168. data/app/models/atlas_engine/address_validation/candidate.rb +109 -0
  169. data/app/models/atlas_engine/address_validation/candidate_tuple.rb +15 -0
  170. data/app/models/atlas_engine/address_validation/concern.rb +74 -0
  171. data/app/models/atlas_engine/address_validation/concern_producer.rb +19 -0
  172. data/app/models/atlas_engine/address_validation/concern_queue.rb +20 -0
  173. data/app/models/atlas_engine/address_validation/concern_record.rb +122 -0
  174. data/app/models/atlas_engine/address_validation/datastore_base.rb +27 -0
  175. data/app/models/atlas_engine/address_validation/errors.rb +13 -0
  176. data/app/models/atlas_engine/address_validation/es/candidate_selector.rb +70 -0
  177. data/app/models/atlas_engine/address_validation/es/data_mappers/decompounding_data_mapper.rb +39 -0
  178. data/app/models/atlas_engine/address_validation/es/data_mappers/default_data_mapper.rb +110 -0
  179. data/app/models/atlas_engine/address_validation/es/datastore.rb +229 -0
  180. data/app/models/atlas_engine/address_validation/es/default_query_builder.rb +30 -0
  181. data/app/models/atlas_engine/address_validation/es/query_builder.rb +160 -0
  182. data/app/models/atlas_engine/address_validation/es/term_vectors.rb +78 -0
  183. data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +123 -0
  184. data/app/models/atlas_engine/address_validation/es/validators/full_address_street.rb +18 -0
  185. data/app/models/atlas_engine/address_validation/es/validators/restriction_evaluator.rb +37 -0
  186. data/app/models/atlas_engine/address_validation/field.rb +30 -0
  187. data/app/models/atlas_engine/address_validation/full_address_validator_base.rb +27 -0
  188. data/app/models/atlas_engine/address_validation/log_emitter.rb +66 -0
  189. data/app/models/atlas_engine/address_validation/matching_strategies.rb +16 -0
  190. data/app/models/atlas_engine/address_validation/normalizer.rb +38 -0
  191. data/app/models/atlas_engine/address_validation/predicate_pipeline.rb +80 -0
  192. data/app/models/atlas_engine/address_validation/request.rb +12 -0
  193. data/app/models/atlas_engine/address_validation/result.rb +154 -0
  194. data/app/models/atlas_engine/address_validation/runs_validation.rb +16 -0
  195. data/app/models/atlas_engine/address_validation/session.rb +47 -0
  196. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +72 -0
  197. data/app/models/atlas_engine/address_validation/strategies.rb +10 -0
  198. data/app/models/atlas_engine/address_validation/suggestion.rb +97 -0
  199. data/app/models/atlas_engine/address_validation/token/comparator.rb +44 -0
  200. data/app/models/atlas_engine/address_validation/token/comparison.rb +76 -0
  201. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +158 -0
  202. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +166 -0
  203. data/app/models/atlas_engine/address_validation/token/sequence.rb +147 -0
  204. data/app/models/atlas_engine/address_validation/token/synonyms.rb +77 -0
  205. data/app/models/atlas_engine/address_validation/token.rb +113 -0
  206. data/app/models/atlas_engine/address_validation/validator.rb +147 -0
  207. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +97 -0
  208. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +164 -0
  209. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result_base.rb +46 -0
  210. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +135 -0
  211. data/app/models/atlas_engine/address_validation/validators/full_address/components_to_validate.rb +88 -0
  212. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +127 -0
  213. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +23 -0
  214. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_concern_builder.rb +42 -0
  215. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_country_concern.rb +37 -0
  216. data/app/models/atlas_engine/address_validation/validators/full_address/invalid_zip_for_province_concern.rb +37 -0
  217. data/app/models/atlas_engine/address_validation/validators/full_address/no_candidate_result.rb +26 -0
  218. data/app/models/atlas_engine/address_validation/validators/full_address/number_comparison.rb +31 -0
  219. data/app/models/atlas_engine/address_validation/validators/full_address/postal_code_matcher.rb +60 -0
  220. data/app/models/atlas_engine/address_validation/validators/full_address/result_updater.rb +42 -0
  221. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +140 -0
  222. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_address_concern.rb +30 -0
  223. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_province_concern.rb +38 -0
  224. data/app/models/atlas_engine/address_validation/validators/full_address/unknown_zip_for_address_concern.rb +32 -0
  225. data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern.rb +84 -0
  226. data/app/models/atlas_engine/address_validation/validators/full_address/unsupported_script_result.rb +22 -0
  227. data/app/models/atlas_engine/address_validation/validators/predicates/cache.rb +38 -0
  228. data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +36 -0
  229. data/app/models/atlas_engine/address_validation/validators/predicates/country/exists.rb +34 -0
  230. data/app/models/atlas_engine/address_validation/validators/predicates/country/valid_for_zip.rb +60 -0
  231. data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +38 -0
  232. data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +39 -0
  233. data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +38 -0
  234. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +34 -0
  235. data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_token_count.rb +63 -0
  236. data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +41 -0
  237. data/app/models/atlas_engine/address_validation/validators/predicates/predicate.rb +37 -0
  238. data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +43 -0
  239. data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +48 -0
  240. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +45 -0
  241. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +43 -0
  242. data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +35 -0
  243. data/app/models/atlas_engine/address_validation/validators/predicates/zip/present.rb +58 -0
  244. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_country.rb +45 -0
  245. data/app/models/atlas_engine/address_validation/validators/predicates/zip/valid_for_province.rb +55 -0
  246. data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +25 -0
  247. data/app/models/atlas_engine/address_validation/zip_truncator.rb +32 -0
  248. data/app/models/atlas_engine/application_record.rb +8 -0
  249. data/app/models/atlas_engine/coded_error.rb +18 -0
  250. data/app/models/atlas_engine/coded_errors.rb +17 -0
  251. data/app/models/atlas_engine/country_import.rb +44 -0
  252. data/app/models/atlas_engine/country_profile.rb +270 -0
  253. data/app/models/atlas_engine/country_profile_ingestion_subset.rb +42 -0
  254. data/app/models/atlas_engine/country_profile_subset_base.rb +22 -0
  255. data/app/models/atlas_engine/country_profile_validation_subset.rb +48 -0
  256. data/app/models/atlas_engine/country_repository.rb +110 -0
  257. data/app/models/atlas_engine/elasticsearch/client.rb +116 -0
  258. data/app/models/atlas_engine/elasticsearch/client_interface.rb +89 -0
  259. data/app/models/atlas_engine/elasticsearch/repository.rb +246 -0
  260. data/app/models/atlas_engine/elasticsearch/repository_interface.rb +82 -0
  261. data/app/models/atlas_engine/elasticsearch/response.rb +20 -0
  262. data/app/models/atlas_engine/event.rb +12 -0
  263. data/app/models/atlas_engine/field_decompounder.rb +36 -0
  264. data/app/models/atlas_engine/index_configuration_factory.rb +188 -0
  265. data/app/models/atlas_engine/post_address.rb +114 -0
  266. data/app/models/atlas_engine/post_address_importer.rb +34 -0
  267. data/app/models/atlas_engine/services/service_helper.rb +21 -0
  268. data/app/models/atlas_engine/services/validation.rb +65 -0
  269. data/app/models/atlas_engine/services/validation_eligibility.rb +18 -0
  270. data/app/models/atlas_engine/street.rb +34 -0
  271. data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +106 -0
  272. data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +29 -0
  273. data/app/views/atlas_engine/connectivity/index.html.erb +50 -0
  274. data/app/views/atlas_engine/country_imports/index.html.erb +49 -0
  275. data/app/views/atlas_engine/country_imports/show.html.erb +73 -0
  276. data/app/views/layouts/atlas_engine/application.html.erb +15 -0
  277. data/config/initializers/1.ruby_patches.rb +18 -0
  278. data/config/initializers/sorbet.rb +5 -0
  279. data/config/initializers/worldwide.rb +5 -0
  280. data/config/locales/internal/en.yml +14 -0
  281. data/config/routes.rb +17 -0
  282. data/db/data/address_synonyms/index_configurations/default.yml +141 -0
  283. data/db/data/country_profiles/default.yml +23 -0
  284. data/db/data/transcriber.yml +760 -0
  285. data/db/data/validation_pipelines/es.yml +58 -0
  286. data/db/data/validation_pipelines/es_street.yml +58 -0
  287. data/db/data/validation_pipelines/local.yml +60 -0
  288. data/db/migrate/20230919173037_create_atlas_engine_post_addresses.rb +25 -0
  289. data/db/migrate/20231117142735_add_building_and_unit_ranges_column.rb +7 -0
  290. data/db/migrate/20231117143536_create_atlas_engine_country_imports.rb +11 -0
  291. data/db/migrate/20231117145844_create_atlas_engine_events_table.rb +13 -0
  292. data/db/migrate/20231123153554_add_unique_index_to_atlas_engine_post_addresses.rb +14 -0
  293. data/db/migrate/20231123154658_add_index_to_post_addresses_on_source_id_locale_country_code.rb +12 -0
  294. data/lib/atlas_engine/engine.rb +10 -0
  295. data/lib/atlas_engine/version.rb +6 -0
  296. data/lib/atlas_engine.rb +66 -0
  297. data/lib/tasks/atlas_engine/address_importer.rake +20 -0
  298. metadata +553 -0
@@ -0,0 +1,44 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ require "rubygems/text"
5
+
6
+ module AtlasEngine
7
+ module AddressValidation
8
+ class Token
9
+ class Comparator
10
+ extend T::Sig
11
+ include Gem::Text
12
+
13
+ sig { returns(Token) }
14
+ attr_reader :left, :right
15
+
16
+ sig { params(left_token: Token, right_token: Token).void }
17
+ def initialize(left_token, right_token)
18
+ @left = T.let(left_token, Token)
19
+ @right = T.let(right_token, Token)
20
+ end
21
+
22
+ sig { returns(Comparison) }
23
+ def compare
24
+ left_value = left.value
25
+ right_value = right.value
26
+
27
+ if left_value == right_value
28
+ Comparison.new(left: left, right: right, qualifier: :equal, edit_distance: 0)
29
+ else
30
+ edit = levenshtein_distance(left_value, right_value)
31
+
32
+ if right_value.start_with?(left_value) || left_value.start_with?(right_value)
33
+ Comparison.new(left: left, right: right, qualifier: :prefix, edit_distance: edit)
34
+ elsif right_value.end_with?(left_value) || left_value.end_with?(right_value)
35
+ Comparison.new(left: left, right: right, qualifier: :suffix, edit_distance: edit)
36
+ else
37
+ Comparison.new(left: left, right: right, qualifier: :comp, edit_distance: edit)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,76 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Comparison
8
+ extend T::Sig
9
+ include Comparable
10
+
11
+ QUALIFIERS = T.let(
12
+ [:equal, :prefix, :suffix, :comp].freeze,
13
+ T::Array[Symbol],
14
+ )
15
+
16
+ sig { returns(Token) }
17
+ attr_reader :left, :right
18
+
19
+ sig { returns(Symbol) }
20
+ attr_reader :qualifier
21
+
22
+ sig { returns(Integer) }
23
+ attr_reader :edit_distance
24
+
25
+ sig do
26
+ params(
27
+ left: Token,
28
+ right: Token,
29
+ qualifier: Symbol,
30
+ edit_distance: Integer,
31
+ ).void
32
+ end
33
+ def initialize(left:, right:, qualifier:, edit_distance:)
34
+ raise "Unknown qualifier" unless qualifier.in?(QUALIFIERS)
35
+
36
+ @left = left
37
+ @right = right
38
+ @qualifier = qualifier
39
+ @edit_distance = edit_distance
40
+ end
41
+
42
+ sig { params(other: Comparison).returns(Integer) }
43
+ def <=>(other)
44
+ if edit_distance == other.edit_distance
45
+ qualifier_rank <=> other.qualifier_rank
46
+ else
47
+ edit_distance <=> other.edit_distance
48
+ end
49
+ end
50
+
51
+ sig { returns(T::Boolean) }
52
+ def equal?
53
+ qualifier == :equal
54
+ end
55
+
56
+ sig { params(other: Comparison).returns(T::Boolean) }
57
+ def preceeds?(other)
58
+ left.preceeds?(other.left) && right.preceeds?(other.right)
59
+ end
60
+
61
+ sig { returns(String) }
62
+ def inspect
63
+ "<comp left:#{left.inspect} #{qualifier.to_s.upcase} right:#{right.inspect} edit:#{edit_distance}/>"
64
+ end
65
+
66
+ protected
67
+
68
+ sig { returns(Integer) }
69
+ def qualifier_rank
70
+ # Constructor verifies that qualifier is in the list
71
+ T.must(QUALIFIERS.find_index(qualifier))
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,158 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Sequence
8
+ class Comparator
9
+ extend T::Sig
10
+
11
+ sig { returns(Sequence) }
12
+ attr_reader :left, :right
13
+
14
+ attr_reader :comparison_cache
15
+
16
+ MAX_ALLOWED_EDIT_DISTANCE_PERCENT = 0.5
17
+
18
+ sig { params(left_sequence: Sequence, right_sequence: Sequence).void }
19
+ def initialize(left_sequence:, right_sequence:)
20
+ @left = left_sequence
21
+ @right = right_sequence
22
+ @comparison_cache = Hash.new do |h, (l_tok, r_tok)|
23
+ h[[l_tok, r_tok]] = AddressValidation::Token::Comparator.new(l_tok, r_tok).compare
24
+ end
25
+ end
26
+
27
+ sig { returns(Comparison) }
28
+ def compare
29
+ result = left.permutations.product(right.permutations).map do |left_permutation, right_permutation|
30
+ flattened_sequence_compare(left_permutation, right_permutation)
31
+ end
32
+
33
+ T.must(result.min)
34
+ end
35
+
36
+ private
37
+
38
+ sig do
39
+ params(
40
+ left_permutations: T::Array[Token],
41
+ right_permutations: T::Array[Token],
42
+ ).returns(T::Array[Token::Comparison])
43
+ end
44
+ def token_comparisons(left_permutations, right_permutations)
45
+ left_permutations.product(right_permutations).map do |l_tok, r_tok|
46
+ comparison_cache[[l_tok, r_tok]]
47
+ end
48
+ end
49
+
50
+ sig do
51
+ params(
52
+ token_comparisons: T::Array[Token::Comparison],
53
+ ).returns(T::Array[Token::Comparison])
54
+ end
55
+ def sort_token_comparisons(token_comparisons)
56
+ token_comparisons.sort do |a, b|
57
+ comp = a <=> b
58
+
59
+ if comp == 0
60
+ (a.left.position + a.right.position) <=> (b.left.position + b.right.position)
61
+ else
62
+ comp
63
+ end
64
+ end
65
+ end
66
+
67
+ sig do
68
+ params(
69
+ left_permutation: T::Array[Token],
70
+ right_permutation: T::Array[Token],
71
+ ).returns(Sequence::Comparison)
72
+ end
73
+ def flattened_sequence_compare(left_permutation, right_permutation)
74
+ token_comparisons = token_comparisons(left_permutation, right_permutation)
75
+ sorted_token_comparisons = sort_token_comparisons(token_comparisons)
76
+
77
+ filtered_token_comparisons = []
78
+
79
+ until sorted_token_comparisons.empty?
80
+ closest_match = sorted_token_comparisons.shift
81
+
82
+ if tokens_match_by_edit_distance?(comparison: T.must(closest_match))
83
+ filtered_token_comparisons << closest_match
84
+ end
85
+
86
+ sorted_token_comparisons.delete_if do |comparison|
87
+ same_token_or_position?(comparison.left, T.must(closest_match).left) ||
88
+ same_token_or_position?(comparison.right, T.must(closest_match).right)
89
+ end
90
+ end
91
+
92
+ sorted_token_comparisons = filtered_token_comparisons.sort do |token1, token2|
93
+ token1.left.position <=> token2.left.position
94
+ end
95
+
96
+ Comparison.new(
97
+ unmatched_tokens: unmatched_tokens(left_permutation, right_permutation, sorted_token_comparisons),
98
+ token_comparisons: sorted_token_comparisons,
99
+ left_sequence: left,
100
+ right_sequence: right,
101
+ )
102
+ end
103
+
104
+ sig { params(comparison: AddressValidation::Token::Comparison).returns(T::Boolean) }
105
+ def tokens_match_by_edit_distance?(comparison:)
106
+ max_edit_distance = [comparison.left.value.length, comparison.right.value.length].max
107
+ edit_distance_percent = comparison.edit_distance.to_f / max_edit_distance
108
+
109
+ :prefix == comparison.qualifier || edit_distance_percent <= MAX_ALLOWED_EDIT_DISTANCE_PERCENT
110
+ end
111
+
112
+ sig do
113
+ params(
114
+ left_tokens: T::Array[Token],
115
+ right_tokens: T::Array[Token],
116
+ comparisons: T::Array[Token::Comparison],
117
+ ).returns(T::Array[Token])
118
+ end
119
+ def unmatched_tokens(left_tokens, right_tokens, comparisons)
120
+ remaining_left_tokens = left_tokens.reject do |token|
121
+ comparisons.any? do |comparison|
122
+ same_token_or_position?(comparison.left, token)
123
+ end
124
+ end
125
+
126
+ remaining_left_tokens = remove_synonyms_at_same_position(remaining_left_tokens)
127
+
128
+ remaining_right_tokens = right_tokens.reject do |token|
129
+ comparisons.any? do |comparison|
130
+ same_token_or_position?(comparison.right, token)
131
+ end
132
+ end
133
+
134
+ remaining_right_tokens = remove_synonyms_at_same_position(remaining_right_tokens)
135
+
136
+ remaining_left_tokens.concat(remaining_right_tokens)
137
+ end
138
+
139
+ sig { params(token: Token, other_token: Token).returns(T::Boolean) }
140
+ def same_token_or_position?(token, other_token)
141
+ return true if token == other_token
142
+
143
+ token.offset_range == other_token.offset_range && token.position == other_token.position
144
+ end
145
+
146
+ sig { params(tokens: T::Array[Token]).returns(T::Array[Token]) }
147
+ def remove_synonyms_at_same_position(tokens)
148
+ tokens.group_by(&:position)
149
+ .each do |_, tokens|
150
+ tokens.reject! { |token| token.type == "SYNONYM" } if tokens.size > 1
151
+ end
152
+ .values.flatten
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,166 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Sequence
8
+ class Comparison
9
+ extend T::Sig
10
+ include Comparable
11
+
12
+ DEFAULT_PARTIAL_MATCH_THRESHOLD_PERCENT = 0.5
13
+
14
+ attr_reader :unmatched_tokens, :left_sequence, :right_sequence, :token_comparisons
15
+
16
+ sig do
17
+ params(
18
+ unmatched_tokens: T::Array[Token],
19
+ token_comparisons: T::Array[Token::Comparison],
20
+ left_sequence: T.nilable(Sequence),
21
+ right_sequence: T.nilable(Sequence),
22
+ ).void
23
+ end
24
+ def initialize(unmatched_tokens:, token_comparisons:, left_sequence:, right_sequence:)
25
+ @unmatched_tokens = unmatched_tokens
26
+ @token_comparisons = token_comparisons
27
+ @left_sequence = left_sequence
28
+ @right_sequence = right_sequence
29
+ end
30
+
31
+ sig { params(other: Comparison).returns(Integer) }
32
+ def <=>(other)
33
+ # > num matches
34
+ # longest subsequence
35
+ # < num unmatched (kinda related to < aggregate edit distance)
36
+ # < aggregate edit distance
37
+ # > num prefixes
38
+ # > num suffixes
39
+ matches = count_by_qualifier(:equal) <=> other.count_by_qualifier(:equal)
40
+ return matches * -1 if matches.nonzero?
41
+
42
+ unmatched = unmatched_tokens.size <=> other.unmatched_tokens.size
43
+ return unmatched if unmatched.nonzero?
44
+
45
+ longest_subsequence = longest_subsequence_comparison <=> other.longest_subsequence_comparison
46
+ return -1 * longest_subsequence if longest_subsequence.nonzero?
47
+
48
+ edit_distance = aggregate_edit_distance <=> other.aggregate_edit_distance
49
+ return edit_distance if edit_distance.nonzero?
50
+
51
+ prefixes = count_by_qualifier(:prefix) <=> other.count_by_qualifier(:prefix)
52
+ return prefixes * -1 if prefixes.nonzero?
53
+
54
+ (count_by_qualifier(:suffix) <=> other.count_by_qualifier(:suffix)) * -1
55
+ end
56
+
57
+ sig { returns(String) }
58
+ def inspect
59
+ parts = ["["]
60
+ token_comparisons.each do |comparison|
61
+ parts << "\n#{comparison.inspect}"
62
+ end
63
+ parts << "\n" unless token_comparisons.empty?
64
+ parts << "]"
65
+ "<seqcomp unmatched:#{unmatched_tokens.inspect} comp:#{parts.join}/>"
66
+ end
67
+
68
+ sig { params(other_comparison: Comparison).returns(T::Boolean) }
69
+ def better_than?(other_comparison)
70
+ self < other_comparison
71
+ end
72
+
73
+ sig { params(other_comparison: Comparison).returns(T::Boolean) }
74
+ def worse_than?(other_comparison)
75
+ self > other_comparison
76
+ end
77
+
78
+ sig { params(other_comparison: Comparison).returns(T::Boolean) }
79
+ def equivalent_to?(other_comparison)
80
+ self == other_comparison
81
+ end
82
+
83
+ sig { params(other_comparison: Comparison).returns(Comparison) }
84
+ def merge(other_comparison)
85
+ AddressValidation::Token::Sequence::Comparison.new(
86
+ unmatched_tokens: unmatched_tokens + other_comparison.unmatched_tokens,
87
+ token_comparisons: (token_comparisons + other_comparison.token_comparisons).uniq,
88
+ left_sequence: left_sequence.equal?(other_comparison.left_sequence) ? left_sequence : nil,
89
+ right_sequence: right_sequence.equal?(other_comparison.right_sequence) ? right_sequence : nil,
90
+ )
91
+ end
92
+
93
+ sig { returns(T::Boolean) }
94
+ def match?
95
+ aggregate_edit_distance == 0 && unmatched_tokens.empty?
96
+ end
97
+
98
+ sig { params(threshold_percent: Float).returns(T::Boolean) }
99
+ def potential_match?(threshold_percent: DEFAULT_PARTIAL_MATCH_THRESHOLD_PERCENT)
100
+ matched_tokens_percent >= threshold_percent && matched_length_percent >= threshold_percent
101
+ end
102
+
103
+ sig { returns(Integer) }
104
+ def aggregate_edit_distance
105
+ token_comparisons.sum(&:edit_distance)
106
+ end
107
+
108
+ sig { returns(Integer) }
109
+ def token_match_count
110
+ token_comparisons.size
111
+ end
112
+
113
+ protected
114
+
115
+ sig { params(qualifier: Symbol).returns(Integer) }
116
+ def count_by_qualifier(qualifier)
117
+ token_comparisons.count { |comparison| comparison.qualifier == qualifier }
118
+ end
119
+
120
+ sig { returns([Integer, Integer]) }
121
+ def longest_subsequence_comparison
122
+ max_subsequence_length = subsequence_lengths.max || 0
123
+ # max length, number of times we saw a subsequence of max length (acts as a tiebreaker)
124
+ [max_subsequence_length, subsequence_lengths.count(max_subsequence_length)]
125
+ end
126
+
127
+ private
128
+
129
+ sig { returns(T::Array[Integer]) }
130
+ def subsequence_lengths
131
+ # measure length of consecutive pairs of equal tokens. The position of both compared tokens
132
+ # must increase by 1 relative to the preceeding AddressValidation::Token::Comparison's pair.
133
+ @subsequence_lengths = equal_token_comparisons
134
+ .chunk_while { |token_comp, next_token_comp| token_comp.preceeds?(next_token_comp) }
135
+ .map(&:length)
136
+ .select { |length| length > 1 } # trivial sequences of length 1 are ignored
137
+ end
138
+
139
+ sig { returns(T::Array[Token::Comparison]) }
140
+ def equal_token_comparisons
141
+ token_comparisons.select(&:equal?)
142
+ end
143
+
144
+ sig { returns(Float) }
145
+ def matched_tokens_percent
146
+ matched_tokens_count = token_comparisons.size * 2
147
+ unmatched_tokens_count = unmatched_tokens.size
148
+ (matched_tokens_count.to_f / (matched_tokens_count + unmatched_tokens_count)).round(2)
149
+ end
150
+
151
+ sig { returns(Float) }
152
+ def matched_length_percent
153
+ matched_length = token_comparisons.sum do |token_pair|
154
+ token_pair.left.value.length + token_pair.right.value.length - token_pair.edit_distance
155
+ end
156
+ total_edit_distance = token_comparisons.sum(&:edit_distance)
157
+ unmatched_length = unmatched_tokens.sum do |token|
158
+ token.value.length
159
+ end
160
+ (matched_length.to_f / (matched_length + unmatched_length + total_edit_distance)).round(2)
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
166
+ end
@@ -0,0 +1,147 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Sequence
8
+ extend T::Sig
9
+
10
+ class << self
11
+ extend T::Sig
12
+ include Normalizer
13
+ ACCEPTABLE_CHARACTERS = /\p{Alnum}/
14
+
15
+ sig { params(string: T.nilable(String)).returns(Sequence) }
16
+ def from_string(string)
17
+ start_offset = 0
18
+ end_offset = 0
19
+ position = 0
20
+
21
+ tokens = Annex29.segment_words(string).filter_map do |substring|
22
+ start_offset = end_offset
23
+ end_offset = start_offset + substring.length
24
+
25
+ normalized_substring = normalize(substring)
26
+ # annex 29 returns whitespace and punctuation as separate substrings
27
+ next unless normalized_substring.match?(ACCEPTABLE_CHARACTERS)
28
+
29
+ token = Token.new(
30
+ value: normalized_substring,
31
+ start_offset: start_offset,
32
+ end_offset: end_offset,
33
+ position: position,
34
+ type: number?(substring) ? "<NUM>" : "<ALPHANUM>",
35
+ )
36
+
37
+ position += 1
38
+
39
+ token
40
+ end
41
+
42
+ new(tokens: tokens, raw_value: string)
43
+ end
44
+
45
+ def number?(string)
46
+ !Float(string).nil?
47
+ rescue
48
+ false
49
+ end
50
+ end
51
+
52
+ TokenOrSynonyms = T.type_alias { T.any(Token, Synonyms) }
53
+
54
+ sig { returns(T::Array[TokenOrSynonyms]) }
55
+ attr_reader :tokens
56
+
57
+ sig { returns(T.nilable(String)) }
58
+ attr_reader :raw_value
59
+
60
+ # Sorbet can't handle delegates https://github.com/sorbet/sorbet/issues/4794
61
+ # rubocop:disable Rails/Delegate
62
+ sig { returns(T::Boolean) }
63
+ def empty? = tokens.empty?
64
+
65
+ sig { returns(Integer) }
66
+ def size = tokens.size
67
+
68
+ sig { returns(Integer) }
69
+ def length = tokens.length
70
+ # rubocop:enable Rails/Delegate
71
+
72
+ sig { params(tokens: T::Array[Token], raw_value: T.nilable(String)).void }
73
+ def initialize(tokens: [], raw_value: nil)
74
+ @raw_value = raw_value
75
+ @tokens = group_by_overlapping_offsets(tokens)
76
+ .map { |tkns| tkns.one? ? T.must(tkns.first) : Synonyms.new(tokens: tkns) }
77
+ end
78
+
79
+ sig { returns(String) }
80
+ def inspect
81
+ "<seq #{tokens.inspect}/>"
82
+ end
83
+
84
+ sig { returns(T::Array[T::Array[Token]]) }
85
+ def permutations = recursive_permutations(tokens)
86
+
87
+ def ==(other)
88
+ return false unless other.is_a?(Sequence)
89
+
90
+ tokens == other.tokens
91
+ end
92
+
93
+ private
94
+
95
+ sig { params(token_array: T::Array[TokenOrSynonyms]).returns(T::Array[T::Array[Token]]) }
96
+ def recursive_permutations(token_array)
97
+ # we bottom out when token_array contains only simple tokens
98
+ next_synonyms_index = token_array.find_index { |entry| entry.is_a?(Synonyms) }
99
+ # There are no synonyms in that array, cast is safe
100
+ return [T.cast(token_array, T::Array[Token])] unless next_synonyms_index
101
+
102
+ new_tokens = token_array.dup
103
+ synonyms = T.cast(new_tokens[next_synonyms_index], Synonyms)
104
+ new_tokens.delete_at(next_synonyms_index)
105
+
106
+ if synonyms.multi_token?
107
+ # token_array (before synonyms object was deleted): [a, b, <syn [afb, [air, force, base]]/>, ...rest]
108
+ # output: [[a, b, afb, ...rest], [a, b, air, force, base, ...rest]]
109
+ synonyms.tokens.flat_map do |multi_token_entry|
110
+ current_permutation = T.unsafe(new_tokens).dup.insert(next_synonyms_index, *Array(multi_token_entry))
111
+ # ...rest will be handled recursively
112
+ recursive_permutations(current_permutation)
113
+ end
114
+ else
115
+ # token_array (before synonyms object was deleted): [a, b, <syn [st, street, saint/>, ...rest]
116
+ # output: [[a, b, st, street, saint, ...rest]]
117
+ T.unsafe(new_tokens).insert(next_synonyms_index, *synonyms.tokens)
118
+ # ...rest will be handled recursively
119
+ recursive_permutations(new_tokens)
120
+ end
121
+ end
122
+
123
+ sig { params(tokens: T::Array[Token]).returns(T::Array[T::Array[Token]]) }
124
+ def group_by_overlapping_offsets(tokens)
125
+ return [] if tokens.empty?
126
+
127
+ sorted_tokens = tokens.stable_sort_by(&:position)
128
+ current_range = sorted_tokens.first&.offset_range
129
+
130
+ groups = []
131
+ current_group = []
132
+ sorted_tokens.each do |token|
133
+ if current_range.cover?(token.offset_range)
134
+ current_group << token
135
+ else
136
+ groups << current_group
137
+ current_group = [token]
138
+ current_range = token.offset_range
139
+ end
140
+ end
141
+
142
+ groups << current_group
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,77 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Synonyms
8
+ extend T::Sig
9
+ # Similar to a Token, and has some of the same methods like position, value, type.
10
+
11
+ TokenList = T.type_alias { T::Array[Token] }
12
+
13
+ sig { returns(T::Array[T.any(Token, TokenList)]) }
14
+ attr_reader :tokens
15
+
16
+ # Sorbet can't handle delegates https://github.com/sorbet/sorbet/issues/4794
17
+ # rubocop:disable Rails/Delegate
18
+ sig { returns(Integer) }
19
+ def position = first_token.position
20
+
21
+ sig { returns(T::Range[Integer]) }
22
+ def offset_range = first_token.offset_range
23
+ # rubocop:enable Rails/Delegate
24
+
25
+ sig { params(tokens: T::Array[Token]).void }
26
+ def initialize(tokens: [])
27
+ raise ArgumentError, "Synonyms cannot be empty" if tokens.empty?
28
+
29
+ @tokens = []
30
+ tokens_by_position = tokens.stable_sort_by(&:position).group_by(&:position)
31
+
32
+ while tokens_by_position.values.any?(&:present?)
33
+ current_group = []
34
+ starting_position = tokens_by_position.keys.first
35
+
36
+ while tokens_by_position.key?(starting_position)
37
+ token = T.must(tokens_by_position[starting_position]).shift
38
+ current_group << token
39
+ starting_position += T.must(token).position_length
40
+ end
41
+
42
+ @tokens << (current_group.one? ? current_group.first : current_group)
43
+ tokens_by_position.compact_blank! # remove positions having no tokens
44
+ end
45
+ end
46
+
47
+ sig { returns(String) }
48
+ def inspect
49
+ "<syn #{tokens.inspect}/>"
50
+ end
51
+
52
+ sig { returns(NilClass) }
53
+ def value
54
+ nil
55
+ end
56
+
57
+ sig { returns(String) }
58
+ def type
59
+ "<SYNONYMS>"
60
+ end
61
+
62
+ sig { returns(T::Boolean) }
63
+ def multi_token?
64
+ tokens.any?(Array)
65
+ end
66
+
67
+ private
68
+
69
+ sig { returns(Token) }
70
+ def first_token
71
+ head = T.must(tokens.first)
72
+ head.is_a?(Array) ? T.must(head.first) : head
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end