geohydra 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (687) hide show
  1. data/.gitignore +23 -0
  2. data/.gitmodules +3 -0
  3. data/.travis.yml +7 -0
  4. data/Gemfile +15 -0
  5. data/Gemfile.lock +281 -0
  6. data/LICENSE +14 -0
  7. data/README.md +187 -0
  8. data/Rakefile +34 -0
  9. data/VERSION +1 -0
  10. data/bin/accession.rb +109 -0
  11. data/bin/assemble.rb +307 -0
  12. data/bin/assemble_data.rb +66 -0
  13. data/bin/assemble_placenames.rb +110 -0
  14. data/bin/build_stage_options.rb +27 -0
  15. data/bin/derive_wgs84.rb +85 -0
  16. data/bin/extract_thumbnail.rb +49 -0
  17. data/bin/geohydra +11 -0
  18. data/bin/ingest_arcgis.rb +77 -0
  19. data/bin/ingest_tufts.rb +71 -0
  20. data/bin/loader.rb +296 -0
  21. data/bin/loader_postgis.rb +259 -0
  22. data/bin/seed.rb +78 -0
  23. data/bin/solr_indexer.rb +41 -0
  24. data/bin/sync_geoserver_metadata.rb +168 -0
  25. data/bin/validate_data.rb +59 -0
  26. data/config/.gitignore +1 -0
  27. data/config/boot.rb +17 -0
  28. data/config/database.yml +29 -0
  29. data/config/environments/.gitignore +8 -0
  30. data/config/environments/.gitkeep +0 -0
  31. data/config/environments/example.rb +85 -0
  32. data/config/environments/example_rgeoserver.yml +16 -0
  33. data/config/migrate.rb +11 -0
  34. data/geohydra.gemspec +49 -0
  35. data/lib/geohydra.rb +11 -0
  36. data/lib/geohydra/accession.rb +291 -0
  37. data/lib/geohydra/arcgis_to_iso19139_fc.xsl +364 -0
  38. data/lib/geohydra/gazetteer.csv +36 -0
  39. data/lib/geohydra/gazetteer.rb +73 -0
  40. data/lib/geohydra/geonetwork.rb +156 -0
  41. data/lib/geohydra/mods2ogp.xsl +198 -0
  42. data/lib/geohydra/ogpcleanup.xsl +28 -0
  43. data/lib/geohydra/solr.rb +49 -0
  44. data/lib/geohydra/transform.rb +175 -0
  45. data/lib/geohydra/utils.rb +19 -0
  46. data/lib/geohydra/version.rb +3 -0
  47. data/scripts/correct_collections.rb +93 -0
  48. data/scripts/dbfdump.rb +13 -0
  49. data/scripts/extract.xsl +35 -0
  50. data/scripts/geoserver_createdb.sh +6 -0
  51. data/scripts/geoserver_createdb.sql +29 -0
  52. data/scripts/geowebcache_tops.rb +22 -0
  53. data/scripts/ingest_mods.rb +23 -0
  54. data/scripts/iso2html/displayElement.xsl +52 -0
  55. data/scripts/iso2html/elements-ISO.xml +665 -0
  56. data/scripts/iso2html/headers-ISO.xml +16 -0
  57. data/scripts/iso2html/printFormatted.xsl +227 -0
  58. data/scripts/iso2html/printtextlines.xsl +171 -0
  59. data/scripts/iso2html/xml-to-html-ISO.xsl +273 -0
  60. data/scripts/iso2html/xml-to-text-ISO.xsl +148 -0
  61. data/scripts/load_stage_data.rb +13 -0
  62. data/scripts/nextgis_iso19115.xsl +553 -0
  63. data/scripts/replicate_db.sh +7 -0
  64. data/scripts/report_stage.rb +5 -0
  65. data/solr/dev.html +19 -0
  66. data/solr/dlss-dev-drh-geo/conf/schema.xml +54 -0
  67. data/solr/dlss-dev-drh-geo/conf/solrconfig.xml +156 -0
  68. data/solr/dlss-dev-drh-geo/deploy.sh +15 -0
  69. data/solr/dlss-dev-drh-geo/example_doc.xml +25 -0
  70. data/solr/dlss-dev-drh-geo/test.sh +14 -0
  71. data/solr/ogp-dev/conf/admin-extra.html +24 -0
  72. data/solr/ogp-dev/conf/admin-extra.menu-bottom.html +25 -0
  73. data/solr/ogp-dev/conf/admin-extra.menu-top.html +25 -0
  74. data/solr/ogp-dev/conf/currency.xml +67 -0
  75. data/solr/ogp-dev/conf/elevate.xml +38 -0
  76. data/solr/ogp-dev/conf/lang/contractions_ca.txt +8 -0
  77. data/solr/ogp-dev/conf/lang/contractions_fr.txt +15 -0
  78. data/solr/ogp-dev/conf/lang/contractions_ga.txt +5 -0
  79. data/solr/ogp-dev/conf/lang/contractions_it.txt +23 -0
  80. data/solr/ogp-dev/conf/lang/hyphenations_ga.txt +5 -0
  81. data/solr/ogp-dev/conf/lang/stemdict_nl.txt +6 -0
  82. data/solr/ogp-dev/conf/lang/stoptags_ja.txt +420 -0
  83. data/solr/ogp-dev/conf/lang/stopwords_ar.txt +125 -0
  84. data/solr/ogp-dev/conf/lang/stopwords_bg.txt +193 -0
  85. data/solr/ogp-dev/conf/lang/stopwords_ca.txt +220 -0
  86. data/solr/ogp-dev/conf/lang/stopwords_cz.txt +172 -0
  87. data/solr/ogp-dev/conf/lang/stopwords_da.txt +108 -0
  88. data/solr/ogp-dev/conf/lang/stopwords_de.txt +292 -0
  89. data/solr/ogp-dev/conf/lang/stopwords_el.txt +78 -0
  90. data/solr/ogp-dev/conf/lang/stopwords_en.txt +54 -0
  91. data/solr/ogp-dev/conf/lang/stopwords_es.txt +354 -0
  92. data/solr/ogp-dev/conf/lang/stopwords_eu.txt +99 -0
  93. data/solr/ogp-dev/conf/lang/stopwords_fa.txt +313 -0
  94. data/solr/ogp-dev/conf/lang/stopwords_fi.txt +95 -0
  95. data/solr/ogp-dev/conf/lang/stopwords_fr.txt +184 -0
  96. data/solr/ogp-dev/conf/lang/stopwords_ga.txt +110 -0
  97. data/solr/ogp-dev/conf/lang/stopwords_gl.txt +161 -0
  98. data/solr/ogp-dev/conf/lang/stopwords_hi.txt +235 -0
  99. data/solr/ogp-dev/conf/lang/stopwords_hu.txt +209 -0
  100. data/solr/ogp-dev/conf/lang/stopwords_hy.txt +46 -0
  101. data/solr/ogp-dev/conf/lang/stopwords_id.txt +359 -0
  102. data/solr/ogp-dev/conf/lang/stopwords_it.txt +301 -0
  103. data/solr/ogp-dev/conf/lang/stopwords_ja.txt +127 -0
  104. data/solr/ogp-dev/conf/lang/stopwords_lv.txt +172 -0
  105. data/solr/ogp-dev/conf/lang/stopwords_nl.txt +117 -0
  106. data/solr/ogp-dev/conf/lang/stopwords_no.txt +192 -0
  107. data/solr/ogp-dev/conf/lang/stopwords_pt.txt +251 -0
  108. data/solr/ogp-dev/conf/lang/stopwords_ro.txt +233 -0
  109. data/solr/ogp-dev/conf/lang/stopwords_ru.txt +241 -0
  110. data/solr/ogp-dev/conf/lang/stopwords_sv.txt +131 -0
  111. data/solr/ogp-dev/conf/lang/stopwords_th.txt +119 -0
  112. data/solr/ogp-dev/conf/lang/stopwords_tr.txt +212 -0
  113. data/solr/ogp-dev/conf/lang/userdict_ja.txt +29 -0
  114. data/solr/ogp-dev/conf/mapping-FoldToASCII.txt +3813 -0
  115. data/solr/ogp-dev/conf/mapping-ISOLatin1Accent.txt +246 -0
  116. data/solr/ogp-dev/conf/protwords.txt +21 -0
  117. data/solr/ogp-dev/conf/schema.xml +1362 -0
  118. data/solr/ogp-dev/conf/scripts.conf +24 -0
  119. data/solr/ogp-dev/conf/solrconfig.xml +1816 -0
  120. data/solr/ogp-dev/conf/spellings.txt +2 -0
  121. data/solr/ogp-dev/conf/stopwords.txt +14 -0
  122. data/solr/ogp-dev/conf/synonyms.txt +29 -0
  123. data/solr/ogp-dev/conf/synonymsIso.txt +158 -0
  124. data/solr/ogp-dev/conf/synonymsLcsh.txt +120 -0
  125. data/solr/ogp-dev/conf/synonymsState.txt +105 -0
  126. data/solr/ogp-dev/conf/update-script.js +53 -0
  127. data/solr/ogp-dev/conf/velocity/VM_global_library.vm +170 -0
  128. data/solr/ogp-dev/conf/velocity/browse.vm +50 -0
  129. data/solr/ogp-dev/conf/velocity/cluster.vm +9 -0
  130. data/solr/ogp-dev/conf/velocity/clusterResults.vm +12 -0
  131. data/solr/ogp-dev/conf/velocity/debug.vm +17 -0
  132. data/solr/ogp-dev/conf/velocity/did_you_mean.vm +4 -0
  133. data/solr/ogp-dev/conf/velocity/facet_fields.vm +15 -0
  134. data/solr/ogp-dev/conf/velocity/facet_pivot.vm +3 -0
  135. data/solr/ogp-dev/conf/velocity/facet_queries.vm +3 -0
  136. data/solr/ogp-dev/conf/velocity/facet_ranges.vm +15 -0
  137. data/solr/ogp-dev/conf/velocity/facets.vm +5 -0
  138. data/solr/ogp-dev/conf/velocity/footer.vm +17 -0
  139. data/solr/ogp-dev/conf/velocity/head.vm +32 -0
  140. data/solr/ogp-dev/conf/velocity/header.vm +3 -0
  141. data/solr/ogp-dev/conf/velocity/hit.vm +11 -0
  142. data/solr/ogp-dev/conf/velocity/hitGrouped.vm +24 -0
  143. data/solr/ogp-dev/conf/velocity/join-doc.vm +4 -0
  144. data/solr/ogp-dev/conf/velocity/jquery.autocomplete.css +48 -0
  145. data/solr/ogp-dev/conf/velocity/jquery.autocomplete.js +763 -0
  146. data/solr/ogp-dev/conf/velocity/layout.vm +20 -0
  147. data/solr/ogp-dev/conf/velocity/main.css +208 -0
  148. data/solr/ogp-dev/conf/velocity/product-doc.vm +27 -0
  149. data/solr/ogp-dev/conf/velocity/query.vm +42 -0
  150. data/solr/ogp-dev/conf/velocity/queryGroup.vm +19 -0
  151. data/solr/ogp-dev/conf/velocity/querySpatial.vm +40 -0
  152. data/solr/ogp-dev/conf/velocity/richtext-doc.vm +114 -0
  153. data/solr/ogp-dev/conf/velocity/suggest.vm +3 -0
  154. data/solr/ogp-dev/conf/velocity/tabs.vm +6 -0
  155. data/solr/ogp-dev/conf/xslt/example.xsl +132 -0
  156. data/solr/ogp-dev/conf/xslt/example_atom.xsl +67 -0
  157. data/solr/ogp-dev/conf/xslt/example_rss.xsl +66 -0
  158. data/solr/ogp-dev/conf/xslt/luke.xsl +337 -0
  159. data/solr/ogp-dev/conf/xslt/updateXml.xsl +70 -0
  160. data/solr/ogp-dev/deploy.sh +15 -0
  161. data/solr/ogp-dev/purge.sh +9 -0
  162. data/solr/ogp-dev/upload.sh +17 -0
  163. data/solr/ogp-test/conf/admin-extra.html +24 -0
  164. data/solr/ogp-test/conf/admin-extra.menu-bottom.html +25 -0
  165. data/solr/ogp-test/conf/admin-extra.menu-top.html +25 -0
  166. data/solr/ogp-test/conf/currency.xml +67 -0
  167. data/solr/ogp-test/conf/elevate.xml +38 -0
  168. data/solr/ogp-test/conf/lang/contractions_ca.txt +8 -0
  169. data/solr/ogp-test/conf/lang/contractions_fr.txt +15 -0
  170. data/solr/ogp-test/conf/lang/contractions_ga.txt +5 -0
  171. data/solr/ogp-test/conf/lang/contractions_it.txt +23 -0
  172. data/solr/ogp-test/conf/lang/hyphenations_ga.txt +5 -0
  173. data/solr/ogp-test/conf/lang/stemdict_nl.txt +6 -0
  174. data/solr/ogp-test/conf/lang/stoptags_ja.txt +420 -0
  175. data/solr/ogp-test/conf/lang/stopwords_ar.txt +125 -0
  176. data/solr/ogp-test/conf/lang/stopwords_bg.txt +193 -0
  177. data/solr/ogp-test/conf/lang/stopwords_ca.txt +220 -0
  178. data/solr/ogp-test/conf/lang/stopwords_cz.txt +172 -0
  179. data/solr/ogp-test/conf/lang/stopwords_da.txt +108 -0
  180. data/solr/ogp-test/conf/lang/stopwords_de.txt +292 -0
  181. data/solr/ogp-test/conf/lang/stopwords_el.txt +78 -0
  182. data/solr/ogp-test/conf/lang/stopwords_en.txt +54 -0
  183. data/solr/ogp-test/conf/lang/stopwords_es.txt +354 -0
  184. data/solr/ogp-test/conf/lang/stopwords_eu.txt +99 -0
  185. data/solr/ogp-test/conf/lang/stopwords_fa.txt +313 -0
  186. data/solr/ogp-test/conf/lang/stopwords_fi.txt +95 -0
  187. data/solr/ogp-test/conf/lang/stopwords_fr.txt +184 -0
  188. data/solr/ogp-test/conf/lang/stopwords_ga.txt +110 -0
  189. data/solr/ogp-test/conf/lang/stopwords_gl.txt +161 -0
  190. data/solr/ogp-test/conf/lang/stopwords_hi.txt +235 -0
  191. data/solr/ogp-test/conf/lang/stopwords_hu.txt +209 -0
  192. data/solr/ogp-test/conf/lang/stopwords_hy.txt +46 -0
  193. data/solr/ogp-test/conf/lang/stopwords_id.txt +359 -0
  194. data/solr/ogp-test/conf/lang/stopwords_it.txt +301 -0
  195. data/solr/ogp-test/conf/lang/stopwords_ja.txt +127 -0
  196. data/solr/ogp-test/conf/lang/stopwords_lv.txt +172 -0
  197. data/solr/ogp-test/conf/lang/stopwords_nl.txt +117 -0
  198. data/solr/ogp-test/conf/lang/stopwords_no.txt +192 -0
  199. data/solr/ogp-test/conf/lang/stopwords_pt.txt +251 -0
  200. data/solr/ogp-test/conf/lang/stopwords_ro.txt +233 -0
  201. data/solr/ogp-test/conf/lang/stopwords_ru.txt +241 -0
  202. data/solr/ogp-test/conf/lang/stopwords_sv.txt +131 -0
  203. data/solr/ogp-test/conf/lang/stopwords_th.txt +119 -0
  204. data/solr/ogp-test/conf/lang/stopwords_tr.txt +212 -0
  205. data/solr/ogp-test/conf/lang/userdict_ja.txt +29 -0
  206. data/solr/ogp-test/conf/mapping-FoldToASCII.txt +3813 -0
  207. data/solr/ogp-test/conf/mapping-ISOLatin1Accent.txt +246 -0
  208. data/solr/ogp-test/conf/protwords.txt +21 -0
  209. data/solr/ogp-test/conf/schema.xml +1362 -0
  210. data/solr/ogp-test/conf/scripts.conf +24 -0
  211. data/solr/ogp-test/conf/solrconfig.xml +1816 -0
  212. data/solr/ogp-test/conf/spellings.txt +2 -0
  213. data/solr/ogp-test/conf/stopwords.txt +14 -0
  214. data/solr/ogp-test/conf/synonyms.txt +29 -0
  215. data/solr/ogp-test/conf/synonymsIso.txt +158 -0
  216. data/solr/ogp-test/conf/synonymsLcsh.txt +120 -0
  217. data/solr/ogp-test/conf/synonymsState.txt +105 -0
  218. data/solr/ogp-test/conf/update-script.js +53 -0
  219. data/solr/ogp-test/conf/velocity/VM_global_library.vm +170 -0
  220. data/solr/ogp-test/conf/velocity/browse.vm +50 -0
  221. data/solr/ogp-test/conf/velocity/cluster.vm +9 -0
  222. data/solr/ogp-test/conf/velocity/clusterResults.vm +12 -0
  223. data/solr/ogp-test/conf/velocity/debug.vm +17 -0
  224. data/solr/ogp-test/conf/velocity/did_you_mean.vm +4 -0
  225. data/solr/ogp-test/conf/velocity/facet_fields.vm +15 -0
  226. data/solr/ogp-test/conf/velocity/facet_pivot.vm +3 -0
  227. data/solr/ogp-test/conf/velocity/facet_queries.vm +3 -0
  228. data/solr/ogp-test/conf/velocity/facet_ranges.vm +15 -0
  229. data/solr/ogp-test/conf/velocity/facets.vm +5 -0
  230. data/solr/ogp-test/conf/velocity/footer.vm +17 -0
  231. data/solr/ogp-test/conf/velocity/head.vm +32 -0
  232. data/solr/ogp-test/conf/velocity/header.vm +3 -0
  233. data/solr/ogp-test/conf/velocity/hit.vm +11 -0
  234. data/solr/ogp-test/conf/velocity/hitGrouped.vm +24 -0
  235. data/solr/ogp-test/conf/velocity/join-doc.vm +4 -0
  236. data/solr/ogp-test/conf/velocity/jquery.autocomplete.css +48 -0
  237. data/solr/ogp-test/conf/velocity/jquery.autocomplete.js +763 -0
  238. data/solr/ogp-test/conf/velocity/layout.vm +20 -0
  239. data/solr/ogp-test/conf/velocity/main.css +208 -0
  240. data/solr/ogp-test/conf/velocity/product-doc.vm +27 -0
  241. data/solr/ogp-test/conf/velocity/query.vm +42 -0
  242. data/solr/ogp-test/conf/velocity/queryGroup.vm +19 -0
  243. data/solr/ogp-test/conf/velocity/querySpatial.vm +40 -0
  244. data/solr/ogp-test/conf/velocity/richtext-doc.vm +114 -0
  245. data/solr/ogp-test/conf/velocity/suggest.vm +3 -0
  246. data/solr/ogp-test/conf/velocity/tabs.vm +6 -0
  247. data/solr/ogp-test/conf/xslt/example.xsl +132 -0
  248. data/solr/ogp-test/conf/xslt/example_atom.xsl +67 -0
  249. data/solr/ogp-test/conf/xslt/example_rss.xsl +66 -0
  250. data/solr/ogp-test/conf/xslt/luke.xsl +337 -0
  251. data/solr/ogp-test/conf/xslt/updateXml.xsl +70 -0
  252. data/solr/ogp-test/deploy.sh +15 -0
  253. data/solr/ogp-test/purge.sh +9 -0
  254. data/solr/ogp-test/upload.sh +17 -0
  255. data/solr/ogp/conf/admin-extra.html +24 -0
  256. data/solr/ogp/conf/admin-extra.menu-bottom.html +25 -0
  257. data/solr/ogp/conf/admin-extra.menu-top.html +25 -0
  258. data/solr/ogp/conf/currency.xml +67 -0
  259. data/solr/ogp/conf/elevate.xml +38 -0
  260. data/solr/ogp/conf/lang/contractions_ca.txt +8 -0
  261. data/solr/ogp/conf/lang/contractions_fr.txt +15 -0
  262. data/solr/ogp/conf/lang/contractions_ga.txt +5 -0
  263. data/solr/ogp/conf/lang/contractions_it.txt +23 -0
  264. data/solr/ogp/conf/lang/hyphenations_ga.txt +5 -0
  265. data/solr/ogp/conf/lang/stemdict_nl.txt +6 -0
  266. data/solr/ogp/conf/lang/stoptags_ja.txt +420 -0
  267. data/solr/ogp/conf/lang/stopwords_ar.txt +125 -0
  268. data/solr/ogp/conf/lang/stopwords_bg.txt +193 -0
  269. data/solr/ogp/conf/lang/stopwords_ca.txt +220 -0
  270. data/solr/ogp/conf/lang/stopwords_cz.txt +172 -0
  271. data/solr/ogp/conf/lang/stopwords_da.txt +108 -0
  272. data/solr/ogp/conf/lang/stopwords_de.txt +292 -0
  273. data/solr/ogp/conf/lang/stopwords_el.txt +78 -0
  274. data/solr/ogp/conf/lang/stopwords_en.txt +54 -0
  275. data/solr/ogp/conf/lang/stopwords_es.txt +354 -0
  276. data/solr/ogp/conf/lang/stopwords_eu.txt +99 -0
  277. data/solr/ogp/conf/lang/stopwords_fa.txt +313 -0
  278. data/solr/ogp/conf/lang/stopwords_fi.txt +95 -0
  279. data/solr/ogp/conf/lang/stopwords_fr.txt +184 -0
  280. data/solr/ogp/conf/lang/stopwords_ga.txt +110 -0
  281. data/solr/ogp/conf/lang/stopwords_gl.txt +161 -0
  282. data/solr/ogp/conf/lang/stopwords_hi.txt +235 -0
  283. data/solr/ogp/conf/lang/stopwords_hu.txt +209 -0
  284. data/solr/ogp/conf/lang/stopwords_hy.txt +46 -0
  285. data/solr/ogp/conf/lang/stopwords_id.txt +359 -0
  286. data/solr/ogp/conf/lang/stopwords_it.txt +301 -0
  287. data/solr/ogp/conf/lang/stopwords_ja.txt +127 -0
  288. data/solr/ogp/conf/lang/stopwords_lv.txt +172 -0
  289. data/solr/ogp/conf/lang/stopwords_nl.txt +117 -0
  290. data/solr/ogp/conf/lang/stopwords_no.txt +192 -0
  291. data/solr/ogp/conf/lang/stopwords_pt.txt +251 -0
  292. data/solr/ogp/conf/lang/stopwords_ro.txt +233 -0
  293. data/solr/ogp/conf/lang/stopwords_ru.txt +241 -0
  294. data/solr/ogp/conf/lang/stopwords_sv.txt +131 -0
  295. data/solr/ogp/conf/lang/stopwords_th.txt +119 -0
  296. data/solr/ogp/conf/lang/stopwords_tr.txt +212 -0
  297. data/solr/ogp/conf/lang/userdict_ja.txt +29 -0
  298. data/solr/ogp/conf/mapping-FoldToASCII.txt +3813 -0
  299. data/solr/ogp/conf/mapping-ISOLatin1Accent.txt +246 -0
  300. data/solr/ogp/conf/protwords.txt +21 -0
  301. data/solr/ogp/conf/schema.xml +1362 -0
  302. data/solr/ogp/conf/scripts.conf +24 -0
  303. data/solr/ogp/conf/solrconfig.xml +1816 -0
  304. data/solr/ogp/conf/spellings.txt +2 -0
  305. data/solr/ogp/conf/stopwords.txt +14 -0
  306. data/solr/ogp/conf/synonyms.txt +29 -0
  307. data/solr/ogp/conf/synonymsIso.txt +158 -0
  308. data/solr/ogp/conf/synonymsLcsh.txt +120 -0
  309. data/solr/ogp/conf/synonymsState.txt +105 -0
  310. data/solr/ogp/conf/update-script.js +53 -0
  311. data/solr/ogp/conf/velocity/VM_global_library.vm +170 -0
  312. data/solr/ogp/conf/velocity/browse.vm +50 -0
  313. data/solr/ogp/conf/velocity/cluster.vm +9 -0
  314. data/solr/ogp/conf/velocity/clusterResults.vm +12 -0
  315. data/solr/ogp/conf/velocity/debug.vm +17 -0
  316. data/solr/ogp/conf/velocity/did_you_mean.vm +4 -0
  317. data/solr/ogp/conf/velocity/facet_fields.vm +15 -0
  318. data/solr/ogp/conf/velocity/facet_pivot.vm +3 -0
  319. data/solr/ogp/conf/velocity/facet_queries.vm +3 -0
  320. data/solr/ogp/conf/velocity/facet_ranges.vm +15 -0
  321. data/solr/ogp/conf/velocity/facets.vm +5 -0
  322. data/solr/ogp/conf/velocity/footer.vm +17 -0
  323. data/solr/ogp/conf/velocity/head.vm +32 -0
  324. data/solr/ogp/conf/velocity/header.vm +3 -0
  325. data/solr/ogp/conf/velocity/hit.vm +11 -0
  326. data/solr/ogp/conf/velocity/hitGrouped.vm +24 -0
  327. data/solr/ogp/conf/velocity/join-doc.vm +4 -0
  328. data/solr/ogp/conf/velocity/jquery.autocomplete.css +48 -0
  329. data/solr/ogp/conf/velocity/jquery.autocomplete.js +763 -0
  330. data/solr/ogp/conf/velocity/layout.vm +20 -0
  331. data/solr/ogp/conf/velocity/main.css +208 -0
  332. data/solr/ogp/conf/velocity/product-doc.vm +27 -0
  333. data/solr/ogp/conf/velocity/query.vm +42 -0
  334. data/solr/ogp/conf/velocity/queryGroup.vm +19 -0
  335. data/solr/ogp/conf/velocity/querySpatial.vm +40 -0
  336. data/solr/ogp/conf/velocity/richtext-doc.vm +114 -0
  337. data/solr/ogp/conf/velocity/suggest.vm +3 -0
  338. data/solr/ogp/conf/velocity/tabs.vm +6 -0
  339. data/solr/ogp/conf/xslt/example.xsl +132 -0
  340. data/solr/ogp/conf/xslt/example_atom.xsl +67 -0
  341. data/solr/ogp/conf/xslt/example_rss.xsl +66 -0
  342. data/solr/ogp/conf/xslt/luke.xsl +337 -0
  343. data/solr/ogp/conf/xslt/updateXml.xsl +70 -0
  344. data/solr/ogp/deploy.sh +15 -0
  345. data/solr/ogp/upload.sh +16 -0
  346. data/spec/fixtures/bw938nk9584/metadata/geoMetadata.xml +2438 -0
  347. data/spec/fixtures/bw938nk9584/temp/TRIPURA-iso19139-fc.xml +1643 -0
  348. data/spec/fixtures/bw938nk9584/temp/TRIPURA-iso19139.xml +790 -0
  349. data/spec/fixtures/cc142xj8436/metadata/geoMetadata.xml +2029 -0
  350. data/spec/fixtures/cc142xj8436/temp/HARYANA-iso19139-fc.xml +1174 -0
  351. data/spec/fixtures/cc142xj8436/temp/HARYANA-iso19139.xml +850 -0
  352. data/spec/fixtures/cg716wc7949/metadata/geoMetadata.xml +1815 -0
  353. data/spec/fixtures/cg716wc7949/temp/metadata.iso19139-fc.xml +958 -0
  354. data/spec/fixtures/cg716wc7949/temp/metadata.iso19139.xml +852 -0
  355. data/spec/fixtures/cm007pv9601/metadata/geoMetadata.xml +1092 -0
  356. data/spec/fixtures/cm007pv9601/temp/MEGHALAYA-iso19139-fc.xml +379 -0
  357. data/spec/fixtures/cm007pv9601/temp/MEGHALAYA-iso19139.xml +708 -0
  358. data/spec/fixtures/cp055nb0189/metadata/geoMetadata.xml +1820 -0
  359. data/spec/fixtures/cp055nb0189/temp/metadata.iso19139-fc.xml +955 -0
  360. data/spec/fixtures/cp055nb0189/temp/metadata.iso19139.xml +860 -0
  361. data/spec/fixtures/cs838pw3418/metadata/geoMetadata.xml +912 -0
  362. data/spec/fixtures/cs838pw3418/temp/OIL_GAS_FIELDS-iso19139-fc.xml +177 -0
  363. data/spec/fixtures/cs838pw3418/temp/OIL_GAS_FIELDS-iso19139.xml +730 -0
  364. data/spec/fixtures/dd308sy5843/metadata/geoMetadata.xml +2438 -0
  365. data/spec/fixtures/dd308sy5843/temp/ORISSA-iso19139-fc.xml +1643 -0
  366. data/spec/fixtures/dd308sy5843/temp/ORISSA-iso19139.xml +790 -0
  367. data/spec/fixtures/dd452vk1873/metadata/geoMetadata.xml +1168 -0
  368. data/spec/fixtures/dd452vk1873/temp/metadata.iso19139-fc.xml +175 -0
  369. data/spec/fixtures/dd452vk1873/temp/metadata.iso19139.xml +988 -0
  370. data/spec/fixtures/dg850pt1796/metadata/geoMetadata.xml +1411 -0
  371. data/spec/fixtures/dg850pt1796/temp/STATE1951-iso19139-fc.xml +519 -0
  372. data/spec/fixtures/dg850pt1796/temp/STATE1951-iso19139.xml +887 -0
  373. data/spec/fixtures/dn744tf5427/metadata/geoMetadata.xml +7403 -0
  374. data/spec/fixtures/dn744tf5427/temp/DISTRICT1991-iso19139-fc.xml +6539 -0
  375. data/spec/fixtures/dn744tf5427/temp/DISTRICT1991-iso19139.xml +859 -0
  376. data/spec/fixtures/dq603nz8402/metadata/geoMetadata.xml +8485 -0
  377. data/spec/fixtures/dq603nz8402/temp/STATE2001-iso19139-fc.xml +7595 -0
  378. data/spec/fixtures/dq603nz8402/temp/STATE2001-iso19139.xml +885 -0
  379. data/spec/fixtures/dv609zt4699/metadata/geoMetadata.xml +3865 -0
  380. data/spec/fixtures/dv609zt4699/temp/ASSAM-iso19139-fc.xml +3075 -0
  381. data/spec/fixtures/dv609zt4699/temp/ASSAM-iso19139.xml +785 -0
  382. data/spec/fixtures/dz222hw0585/metadata/geoMetadata.xml +2057 -0
  383. data/spec/fixtures/dz222hw0585/temp/PUNJAB-iso19139-fc.xml +1203 -0
  384. data/spec/fixtures/dz222hw0585/temp/PUNJAB-iso19139.xml +849 -0
  385. data/spec/fixtures/fd673qb9705/metadata/geoMetadata.xml +4059 -0
  386. data/spec/fixtures/fd673qb9705/temp/STATE1971-iso19139-fc.xml +3159 -0
  387. data/spec/fixtures/fd673qb9705/temp/STATE1971-iso19139.xml +895 -0
  388. data/spec/fixtures/fg451wp8917/metadata/geoMetadata.xml +2435 -0
  389. data/spec/fixtures/fg451wp8917/temp/SIKKIM-iso19139-fc.xml +1643 -0
  390. data/spec/fixtures/fg451wp8917/temp/SIKKIM-iso19139.xml +787 -0
  391. data/spec/fixtures/fh247yz0156/metadata/geoMetadata.xml +2437 -0
  392. data/spec/fixtures/fh247yz0156/temp/RAJASTHAN-iso19139-fc.xml +1643 -0
  393. data/spec/fixtures/fh247yz0156/temp/RAJASTHAN-iso19139.xml +789 -0
  394. data/spec/fixtures/fs487vd1465/metadata/geoMetadata.xml +3896 -0
  395. data/spec/fixtures/fs487vd1465/temp/CHHATTISGARH-iso19139-fc.xml +3075 -0
  396. data/spec/fixtures/fs487vd1465/temp/CHHATTISGARH-iso19139.xml +816 -0
  397. data/spec/fixtures/fs591bn3317/metadata/geoMetadata.xml +2435 -0
  398. data/spec/fixtures/fs591bn3317/temp/HIMACHAL_PRADESH-iso19139-fc.xml +1643 -0
  399. data/spec/fixtures/fs591bn3317/temp/HIMACHAL_PRADESH-iso19139.xml +787 -0
  400. data/spec/fixtures/fw920bc5473/metadata/geoMetadata.xml +1056 -0
  401. data/spec/fixtures/fw920bc5473/temp/PLSS_TWN-iso19139-fc.xml +327 -0
  402. data/spec/fixtures/fw920bc5473/temp/PLSS_TWN-iso19139.xml +724 -0
  403. data/spec/fixtures/gj831wj3625/metadata/geoMetadata.xml +1735 -0
  404. data/spec/fixtures/gj831wj3625/temp/metadata.iso19139-fc.xml +940 -0
  405. data/spec/fixtures/gj831wj3625/temp/metadata.iso19139.xml +790 -0
  406. data/spec/fixtures/gp075nv3265/metadata/geoMetadata.xml +2432 -0
  407. data/spec/fixtures/gp075nv3265/temp/PONDICHERRY-iso19139-fc.xml +1643 -0
  408. data/spec/fixtures/gp075nv3265/temp/PONDICHERRY-iso19139.xml +784 -0
  409. data/spec/fixtures/gv800hj8141/metadata/geoMetadata.xml +2392 -0
  410. data/spec/fixtures/gv800hj8141/temp/BIHAR-iso19139-fc.xml +1643 -0
  411. data/spec/fixtures/gv800hj8141/temp/BIHAR-iso19139.xml +744 -0
  412. data/spec/fixtures/gw520gz6339/metadata/geoMetadata.xml +3899 -0
  413. data/spec/fixtures/gw520gz6339/temp/DADRA_NAGAR_HAVELI_PT-iso19139-fc.xml +3075 -0
  414. data/spec/fixtures/gw520gz6339/temp/DADRA_NAGAR_HAVELI_PT-iso19139.xml +819 -0
  415. data/spec/fixtures/gy054hz1045/metadata/geoMetadata.xml +2418 -0
  416. data/spec/fixtures/gy054hz1045/temp/HARYANA-iso19139-fc.xml +1633 -0
  417. data/spec/fixtures/gy054hz1045/temp/HARYANA-iso19139.xml +780 -0
  418. data/spec/fixtures/gz352mw6982/metadata/geoMetadata.xml +1772 -0
  419. data/spec/fixtures/gz352mw6982/temp/metadata.iso19139-fc.xml +940 -0
  420. data/spec/fixtures/gz352mw6982/temp/metadata.iso19139.xml +827 -0
  421. data/spec/fixtures/hb489vm9892/metadata/geoMetadata.xml +3663 -0
  422. data/spec/fixtures/hb489vm9892/temp/DISTRICT1981-iso19139-fc.xml +2799 -0
  423. data/spec/fixtures/hb489vm9892/temp/DISTRICT1981-iso19139.xml +859 -0
  424. data/spec/fixtures/hw125dq0418/metadata/geoMetadata.xml +2448 -0
  425. data/spec/fixtures/hw125dq0418/temp/DELHI-iso19139-fc.xml +1599 -0
  426. data/spec/fixtures/hw125dq0418/temp/DELHI-iso19139.xml +844 -0
  427. data/spec/fixtures/hw892mn4587/metadata/geoMetadata.xml +2561 -0
  428. data/spec/fixtures/hw892mn4587/temp/KERALA-iso19139-fc.xml +1763 -0
  429. data/spec/fixtures/hw892mn4587/temp/KERALA-iso19139.xml +793 -0
  430. data/spec/fixtures/jb371hz3868/metadata/geoMetadata.xml +3129 -0
  431. data/spec/fixtures/jb371hz3868/temp/INCOME-iso19139-fc.xml +2263 -0
  432. data/spec/fixtures/jb371hz3868/temp/INCOME-iso19139.xml +861 -0
  433. data/spec/fixtures/jc017yk9928/metadata/geoMetadata.xml +2433 -0
  434. data/spec/fixtures/jc017yk9928/temp/KARNATAKA-iso19139-fc.xml +1599 -0
  435. data/spec/fixtures/jc017yk9928/temp/KARNATAKA-iso19139.xml +829 -0
  436. data/spec/fixtures/jf841ys4828/metadata/geoMetadata.xml +2156 -0
  437. data/spec/fixtures/jf841ys4828/temp/ANDHRA_PRADESH-iso19139-fc.xml +1247 -0
  438. data/spec/fixtures/jf841ys4828/temp/ANDHRA_PRADESH-iso19139.xml +904 -0
  439. data/spec/fixtures/jh802mp2160/metadata/geoMetadata.xml +3917 -0
  440. data/spec/fixtures/jh802mp2160/temp/DELHI_PT-iso19139-fc.xml +3075 -0
  441. data/spec/fixtures/jh802mp2160/temp/DELHI_PT-iso19139.xml +837 -0
  442. data/spec/fixtures/jj806fc3801/metadata/geoMetadata.xml +1235 -0
  443. data/spec/fixtures/jj806fc3801/temp/metadata.iso19139-fc.xml +173 -0
  444. data/spec/fixtures/jj806fc3801/temp/metadata.iso19139.xml +1057 -0
  445. data/spec/fixtures/jq835yn7161/metadata/geoMetadata.xml +2060 -0
  446. data/spec/fixtures/jq835yn7161/temp/HIMACHAL_PRADESH-iso19139-fc.xml +1203 -0
  447. data/spec/fixtures/jq835yn7161/temp/HIMACHAL_PRADESH-iso19139.xml +852 -0
  448. data/spec/fixtures/jr455pt6676/metadata/geoMetadata.xml +2045 -0
  449. data/spec/fixtures/jr455pt6676/temp/TAMILNADU-iso19139-fc.xml +1203 -0
  450. data/spec/fixtures/jr455pt6676/temp/TAMILNADU-iso19139.xml +837 -0
  451. data/spec/fixtures/js637zp2537/metadata/geoMetadata.xml +3894 -0
  452. data/spec/fixtures/js637zp2537/temp/BIHAR-iso19139-fc.xml +3075 -0
  453. data/spec/fixtures/js637zp2537/temp/BIHAR-iso19139.xml +814 -0
  454. data/spec/fixtures/jv502wg9611/metadata/geoMetadata.xml +2052 -0
  455. data/spec/fixtures/jv502wg9611/temp/GOA-iso19139-fc.xml +1203 -0
  456. data/spec/fixtures/jv502wg9611/temp/GOA-iso19139.xml +844 -0
  457. data/spec/fixtures/jw462ck6560/metadata/geoMetadata.xml +2442 -0
  458. data/spec/fixtures/jw462ck6560/temp/JAMMU_KASHMIR-iso19139-fc.xml +1643 -0
  459. data/spec/fixtures/jw462ck6560/temp/JAMMU_KASHMIR-iso19139.xml +794 -0
  460. data/spec/fixtures/kj800fb6273/metadata/geoMetadata.xml +3300 -0
  461. data/spec/fixtures/kj800fb6273/temp/STATE2011-iso19139-fc.xml +2403 -0
  462. data/spec/fixtures/kj800fb6273/temp/STATE2011-iso19139.xml +892 -0
  463. data/spec/fixtures/km504zq3948/metadata/geoMetadata.xml +3862 -0
  464. data/spec/fixtures/km504zq3948/temp/HIMACHAL_PRADESH-iso19139-fc.xml +3075 -0
  465. data/spec/fixtures/km504zq3948/temp/HIMACHAL_PRADESH-iso19139.xml +782 -0
  466. data/spec/fixtures/ks297fy1411/metadata/geoMetadata.xml +1043 -0
  467. data/spec/fixtures/ks297fy1411/temp/OFFSH_BLOCKS-iso19139-fc.xml +350 -0
  468. data/spec/fixtures/ks297fy1411/temp/OFFSH_BLOCKS-iso19139.xml +688 -0
  469. data/spec/fixtures/md358hy5049/metadata/geoMetadata.xml +2424 -0
  470. data/spec/fixtures/md358hy5049/temp/MIZORAM-iso19139-fc.xml +1570 -0
  471. data/spec/fixtures/md358hy5049/temp/MIZORAM-iso19139.xml +849 -0
  472. data/spec/fixtures/mg745bq0193/metadata/geoMetadata.xml +2183 -0
  473. data/spec/fixtures/mg745bq0193/temp/MADHYA_PRADESH-iso19139-fc.xml +1383 -0
  474. data/spec/fixtures/mg745bq0193/temp/MADHYA_PRADESH-iso19139.xml +795 -0
  475. data/spec/fixtures/mh187yx3536/metadata/geoMetadata.xml +2447 -0
  476. data/spec/fixtures/mh187yx3536/temp/WEST_BENGAL-iso19139-fc.xml +1643 -0
  477. data/spec/fixtures/mh187yx3536/temp/WEST_BENGAL-iso19139.xml +799 -0
  478. data/spec/fixtures/mk488yn6694/metadata/geoMetadata.xml +2042 -0
  479. data/spec/fixtures/mk488yn6694/temp/GUJARAT-iso19139-fc.xml +1203 -0
  480. data/spec/fixtures/mk488yn6694/temp/GUJARAT-iso19139.xml +834 -0
  481. data/spec/fixtures/my216kp3008/metadata/geoMetadata.xml +2366 -0
  482. data/spec/fixtures/my216kp3008/temp/DELHI-iso19139-fc.xml +1643 -0
  483. data/spec/fixtures/my216kp3008/temp/DELHI-iso19139.xml +718 -0
  484. data/spec/fixtures/my504nz9827/metadata/geoMetadata.xml +2367 -0
  485. data/spec/fixtures/my504nz9827/temp/JAMMU_KASHMIR-iso19139-fc.xml +1519 -0
  486. data/spec/fixtures/my504nz9827/temp/JAMMU_KASHMIR-iso19139.xml +843 -0
  487. data/spec/fixtures/ng819jm8700/metadata/geoMetadata.xml +2496 -0
  488. data/spec/fixtures/ng819jm8700/temp/MANIPUR-iso19139-fc.xml +1643 -0
  489. data/spec/fixtures/ng819jm8700/temp/MANIPUR-iso19139.xml +848 -0
  490. data/spec/fixtures/np020jq2139/metadata/geoMetadata.xml +1153 -0
  491. data/spec/fixtures/np020jq2139/temp/metadata.iso19139-fc.xml +160 -0
  492. data/spec/fixtures/np020jq2139/temp/metadata.iso19139.xml +988 -0
  493. data/spec/fixtures/ns377mt1608/metadata/geoMetadata.xml +7125 -0
  494. data/spec/fixtures/ns377mt1608/temp/STATE1991-iso19139-fc.xml +6283 -0
  495. data/spec/fixtures/ns377mt1608/temp/STATE1991-iso19139.xml +837 -0
  496. data/spec/fixtures/nw926np8508/metadata/geoMetadata.xml +1143 -0
  497. data/spec/fixtures/nw926np8508/temp/metadata.iso19139-fc.xml +160 -0
  498. data/spec/fixtures/nw926np8508/temp/metadata.iso19139.xml +978 -0
  499. data/spec/fixtures/ny358rm8559/metadata/geoMetadata.xml +2459 -0
  500. data/spec/fixtures/ny358rm8559/temp/TRIPURA-iso19139-fc.xml +1599 -0
  501. data/spec/fixtures/ny358rm8559/temp/TRIPURA-iso19139.xml +855 -0
  502. data/spec/fixtures/nz176rm8192/metadata/geoMetadata.xml +2618 -0
  503. data/spec/fixtures/nz176rm8192/temp/DISTRICT2011-iso19139-fc.xml +1813 -0
  504. data/spec/fixtures/nz176rm8192/temp/DISTRICT2011-iso19139.xml +800 -0
  505. data/spec/fixtures/nz252rq2252/metadata/geoMetadata.xml +2387 -0
  506. data/spec/fixtures/nz252rq2252/temp/UTTAR_PRADESH-iso19139-fc.xml +1599 -0
  507. data/spec/fixtures/nz252rq2252/temp/UTTAR_PRADESH-iso19139.xml +783 -0
  508. data/spec/fixtures/pd902kb3348/metadata/geoMetadata.xml +2414 -0
  509. data/spec/fixtures/pd902kb3348/temp/MADHYA_PRADESH-iso19139-fc.xml +1633 -0
  510. data/spec/fixtures/pd902kb3348/temp/MADHYA_PRADESH-iso19139.xml +776 -0
  511. data/spec/fixtures/pz792fz1776/metadata/geoMetadata.xml +2437 -0
  512. data/spec/fixtures/pz792fz1776/temp/MAHARASHTRA-iso19139-fc.xml +1643 -0
  513. data/spec/fixtures/pz792fz1776/temp/MAHARASHTRA-iso19139.xml +789 -0
  514. data/spec/fixtures/qb767ss4042/metadata/geoMetadata.xml +2045 -0
  515. data/spec/fixtures/qb767ss4042/temp/UTTAR_PRADESH-iso19139-fc.xml +1200 -0
  516. data/spec/fixtures/qb767ss4042/temp/UTTAR_PRADESH-iso19139.xml +840 -0
  517. data/spec/fixtures/qc091qw0570/metadata/geoMetadata.xml +2428 -0
  518. data/spec/fixtures/qc091qw0570/temp/GUJARAT-iso19139-fc.xml +1643 -0
  519. data/spec/fixtures/qc091qw0570/temp/GUJARAT-iso19139.xml +780 -0
  520. data/spec/fixtures/qc652vr7204/metadata/geoMetadata.xml +3877 -0
  521. data/spec/fixtures/qc652vr7204/temp/ANDHRA_PRADESH_PT-iso19139-fc.xml +3075 -0
  522. data/spec/fixtures/qc652vr7204/temp/ANDHRA_PRADESH_PT-iso19139.xml +797 -0
  523. data/spec/fixtures/qk786js7484/metadata/geoMetadata.xml +3267 -0
  524. data/spec/fixtures/qk786js7484/temp/DISTRICT1961-iso19139-fc.xml +2403 -0
  525. data/spec/fixtures/qk786js7484/temp/DISTRICT1961-iso19139.xml +859 -0
  526. data/spec/fixtures/qn676pg6767/metadata/geoMetadata.xml +3861 -0
  527. data/spec/fixtures/qn676pg6767/temp/GOA-iso19139-fc.xml +3075 -0
  528. data/spec/fixtures/qn676pg6767/temp/GOA-iso19139.xml +781 -0
  529. data/spec/fixtures/qr255jh4074/metadata/geoMetadata.xml +3226 -0
  530. data/spec/fixtures/qr255jh4074/temp/LOKSABHA_14-iso19139-fc.xml +2366 -0
  531. data/spec/fixtures/qr255jh4074/temp/LOKSABHA_14-iso19139.xml +855 -0
  532. data/spec/fixtures/qr374kj4827/metadata/geoMetadata.xml +2428 -0
  533. data/spec/fixtures/qr374kj4827/temp/ASSAM-iso19139-fc.xml +1643 -0
  534. data/spec/fixtures/qr374kj4827/temp/ASSAM-iso19139.xml +780 -0
  535. data/spec/fixtures/qy162js1748/metadata/geoMetadata.xml +2440 -0
  536. data/spec/fixtures/qy162js1748/temp/CHHATTISGARH-iso19139-fc.xml +1643 -0
  537. data/spec/fixtures/qy162js1748/temp/CHHATTISGARH-iso19139.xml +792 -0
  538. data/spec/fixtures/rd446vf2633/metadata/geoMetadata.xml +1109 -0
  539. data/spec/fixtures/rd446vf2633/temp/NAGALAND-iso19139-fc.xml +379 -0
  540. data/spec/fixtures/rd446vf2633/temp/NAGALAND-iso19139.xml +725 -0
  541. data/spec/fixtures/rf389hf2983/metadata/geoMetadata.xml +3929 -0
  542. data/spec/fixtures/rf389hf2983/temp/CHHATTISGARH_PT-iso19139-fc.xml +3075 -0
  543. data/spec/fixtures/rf389hf2983/temp/CHHATTISGARH_PT-iso19139.xml +849 -0
  544. data/spec/fixtures/rf859ff4582/metadata/geoMetadata.xml +1251 -0
  545. data/spec/fixtures/rf859ff4582/temp/JHARKHAND-iso19139-fc.xml +459 -0
  546. data/spec/fixtures/rf859ff4582/temp/JHARKHAND-iso19139.xml +787 -0
  547. data/spec/fixtures/rh343ds8931/metadata/geoMetadata.xml +2032 -0
  548. data/spec/fixtures/rh343ds8931/temp/BIHAR-iso19139-fc.xml +1174 -0
  549. data/spec/fixtures/rh343ds8931/temp/BIHAR-iso19139.xml +853 -0
  550. data/spec/fixtures/rn815xk8157/metadata/geoMetadata.xml +2417 -0
  551. data/spec/fixtures/rn815xk8157/temp/SIKKIM-iso19139-fc.xml +1563 -0
  552. data/spec/fixtures/rn815xk8157/temp/SIKKIM-iso19139.xml +849 -0
  553. data/spec/fixtures/rq653sz4470/metadata/geoMetadata.xml +2394 -0
  554. data/spec/fixtures/rq653sz4470/temp/CHHATTISGARH-iso19139-fc.xml +1599 -0
  555. data/spec/fixtures/rq653sz4470/temp/CHHATTISGARH-iso19139.xml +790 -0
  556. data/spec/fixtures/rt625ws6022/metadata/geoMetadata.xml +833 -0
  557. data/spec/fixtures/rt625ws6022/temp/GULF_FAIRWAYS-iso19139-fc.xml +144 -0
  558. data/spec/fixtures/rt625ws6022/temp/GULF_FAIRWAYS-iso19139.xml +684 -0
  559. data/spec/fixtures/sc330vf4259/metadata/geoMetadata.xml +1836 -0
  560. data/spec/fixtures/sc330vf4259/temp/JHARKHAND-iso19139-fc.xml +987 -0
  561. data/spec/fixtures/sc330vf4259/temp/JHARKHAND-iso19139.xml +844 -0
  562. data/spec/fixtures/sq479mx3086/metadata/geoMetadata.xml +1043 -0
  563. data/spec/fixtures/sq479mx3086/temp/OFFSH_PLATF-iso19139-fc.xml +365 -0
  564. data/spec/fixtures/sq479mx3086/temp/OFFSH_PLATF-iso19139.xml +673 -0
  565. data/spec/fixtures/sr686bm4098/metadata/geoMetadata.xml +3896 -0
  566. data/spec/fixtures/sr686bm4098/temp/DAMAN_DIU_PT-iso19139-fc.xml +3075 -0
  567. data/spec/fixtures/sr686bm4098/temp/DAMAN_DIU_PT-iso19139.xml +816 -0
  568. data/spec/fixtures/sv303sh5583/metadata/geoMetadata.xml +2395 -0
  569. data/spec/fixtures/sv303sh5583/temp/ARUNACHAL_PRADESH-iso19139-fc.xml +1555 -0
  570. data/spec/fixtures/sv303sh5583/temp/ARUNACHAL_PRADESH-iso19139.xml +835 -0
  571. data/spec/fixtures/sy319nh8520/metadata/geoMetadata.xml +3925 -0
  572. data/spec/fixtures/sy319nh8520/temp/GUJARAT-iso19139-fc.xml +3075 -0
  573. data/spec/fixtures/sy319nh8520/temp/GUJARAT-iso19139.xml +845 -0
  574. data/spec/fixtures/td363vx2792/metadata/geoMetadata.xml +3964 -0
  575. data/spec/fixtures/td363vx2792/temp/HIMACHAL_PRADESH_PT-iso19139-fc.xml +3075 -0
  576. data/spec/fixtures/td363vx2792/temp/HIMACHAL_PRADESH_PT-iso19139.xml +884 -0
  577. data/spec/fixtures/tf374bd2484/metadata/geoMetadata.xml +1464 -0
  578. data/spec/fixtures/tf374bd2484/temp/DISTRICT1951-iso19139-fc.xml +599 -0
  579. data/spec/fixtures/tf374bd2484/temp/DISTRICT1951-iso19139.xml +860 -0
  580. data/spec/fixtures/tj797mj7877/metadata/geoMetadata.xml +2359 -0
  581. data/spec/fixtures/tj797mj7877/temp/LOKSABHA_15-iso19139-fc.xml +1511 -0
  582. data/spec/fixtures/tj797mj7877/temp/LOKSABHA_15-iso19139.xml +843 -0
  583. data/spec/fixtures/tv060wq5179/metadata/geoMetadata.xml +2046 -0
  584. data/spec/fixtures/tv060wq5179/temp/ASSAM-iso19139-fc.xml +1203 -0
  585. data/spec/fixtures/tv060wq5179/temp/ASSAM-iso19139.xml +838 -0
  586. data/spec/fixtures/tv536bn1915/metadata/geoMetadata.xml +2296 -0
  587. data/spec/fixtures/tv536bn1915/temp/ARUNACHAL_PRADESH-iso19139-fc.xml +1687 -0
  588. data/spec/fixtures/tv536bn1915/temp/ARUNACHAL_PRADESH-iso19139.xml +604 -0
  589. data/spec/fixtures/tz359cc2977/metadata/geoMetadata.xml +1842 -0
  590. data/spec/fixtures/tz359cc2977/temp/MANIPUR-iso19139-fc.xml +987 -0
  591. data/spec/fixtures/tz359cc2977/temp/MANIPUR-iso19139.xml +850 -0
  592. data/spec/fixtures/vb525my6511/metadata/geoMetadata.xml +2026 -0
  593. data/spec/fixtures/vb525my6511/temp/UTTARAKHAND-iso19139-fc.xml +1167 -0
  594. data/spec/fixtures/vb525my6511/temp/UTTARAKHAND-iso19139.xml +854 -0
  595. data/spec/fixtures/vh802fs4240/metadata/geoMetadata.xml +2189 -0
  596. data/spec/fixtures/vh802fs4240/temp/PONDICHERRY-iso19139-fc.xml +1343 -0
  597. data/spec/fixtures/vh802fs4240/temp/PONDICHERRY-iso19139.xml +841 -0
  598. data/spec/fixtures/vk120xn2474/metadata/geoMetadata.xml +1125 -0
  599. data/spec/fixtures/vk120xn2474/temp/PLSS_SEC-iso19139-fc.xml +435 -0
  600. data/spec/fixtures/vk120xn2474/temp/PLSS_SEC-iso19139.xml +685 -0
  601. data/spec/fixtures/vn439bc7316/metadata/geoMetadata.xml +1979 -0
  602. data/spec/fixtures/vn439bc7316/temp/KERALA-iso19139-fc.xml +1131 -0
  603. data/spec/fixtures/vn439bc7316/temp/KERALA-iso19139.xml +843 -0
  604. data/spec/fixtures/vq745jk0695/metadata/geoMetadata.xml +1787 -0
  605. data/spec/fixtures/vq745jk0695/temp/MEGHALAYA-iso19139-fc.xml +987 -0
  606. data/spec/fixtures/vq745jk0695/temp/MEGHALAYA-iso19139.xml +795 -0
  607. data/spec/fixtures/vr593vj7147/metadata/geoMetadata.xml +3895 -0
  608. data/spec/fixtures/vr593vj7147/temp/ANDHRA_PRADESH-iso19139-fc.xml +3075 -0
  609. data/spec/fixtures/vr593vj7147/temp/ANDHRA_PRADESH-iso19139.xml +815 -0
  610. data/spec/fixtures/vw911qb5271/metadata/geoMetadata.xml +8557 -0
  611. data/spec/fixtures/vw911qb5271/temp/DISTRICT2001-iso19139-fc.xml +7683 -0
  612. data/spec/fixtures/vw911qb5271/temp/DISTRICT2001-iso19139.xml +869 -0
  613. data/spec/fixtures/wg680pz0365/metadata/geoMetadata.xml +2475 -0
  614. data/spec/fixtures/wg680pz0365/temp/ANDHRA_PRADESH-iso19139-fc.xml +1599 -0
  615. data/spec/fixtures/wg680pz0365/temp/ANDHRA_PRADESH-iso19139.xml +871 -0
  616. data/spec/fixtures/wg761xn1926/metadata/geoMetadata.xml +3941 -0
  617. data/spec/fixtures/wg761xn1926/temp/HARYANA-iso19139-fc.xml +3075 -0
  618. data/spec/fixtures/wg761xn1926/temp/HARYANA-iso19139.xml +861 -0
  619. data/spec/fixtures/wh870qw1934/metadata/geoMetadata.xml +2428 -0
  620. data/spec/fixtures/wh870qw1934/temp/PUNJAB-iso19139-fc.xml +1643 -0
  621. data/spec/fixtures/wh870qw1934/temp/PUNJAB-iso19139.xml +780 -0
  622. data/spec/fixtures/wk775mm4673/metadata/geoMetadata.xml +2458 -0
  623. data/spec/fixtures/wk775mm4673/temp/MAHARASHTRA-iso19139-fc.xml +1599 -0
  624. data/spec/fixtures/wk775mm4673/temp/MAHARASHTRA-iso19139.xml +854 -0
  625. data/spec/fixtures/ws171yz2165/metadata/geoMetadata.xml +3868 -0
  626. data/spec/fixtures/ws171yz2165/temp/ARUNACHAL_PRADESH_PT-iso19139-fc.xml +3075 -0
  627. data/spec/fixtures/ws171yz2165/temp/ARUNACHAL_PRADESH_PT-iso19139.xml +788 -0
  628. data/spec/fixtures/wt473hz7153/metadata/geoMetadata.xml +3898 -0
  629. data/spec/fixtures/wt473hz7153/temp/CHANDIGARH_PT-iso19139-fc.xml +3075 -0
  630. data/spec/fixtures/wt473hz7153/temp/CHANDIGARH_PT-iso19139.xml +818 -0
  631. data/spec/fixtures/ww217dj0457/metadata/geoMetadata.xml +1205 -0
  632. data/spec/fixtures/ww217dj0457/temp/CO2_PIPE-iso19139-fc.xml +471 -0
  633. data/spec/fixtures/ww217dj0457/temp/CO2_PIPE-iso19139.xml +729 -0
  634. data/spec/fixtures/wy875pk9849/metadata/geoMetadata.xml +3173 -0
  635. data/spec/fixtures/wy875pk9849/temp/STATE1961-iso19139-fc.xml +2279 -0
  636. data/spec/fixtures/wy875pk9849/temp/STATE1961-iso19139.xml +889 -0
  637. data/spec/fixtures/xb018tk2042/metadata/geoMetadata.xml +2335 -0
  638. data/spec/fixtures/xb018tk2042/temp/STATE1981-iso19139-fc.xml +1443 -0
  639. data/spec/fixtures/xb018tk2042/temp/STATE1981-iso19139.xml +887 -0
  640. data/spec/fixtures/xg539vw8586/metadata/geoMetadata.xml +2405 -0
  641. data/spec/fixtures/xg539vw8586/temp/ORISSA-iso19139-fc.xml +1599 -0
  642. data/spec/fixtures/xg539vw8586/temp/ORISSA-iso19139.xml +801 -0
  643. data/spec/fixtures/xv475kp4644/metadata/geoMetadata.xml +3871 -0
  644. data/spec/fixtures/xv475kp4644/temp/ASSAM_PT-iso19139-fc.xml +3075 -0
  645. data/spec/fixtures/xv475kp4644/temp/ASSAM_PT-iso19139.xml +791 -0
  646. data/spec/fixtures/xy096gc2959/metadata/geoMetadata.xml +2432 -0
  647. data/spec/fixtures/xy096gc2959/temp/GOA-iso19139-fc.xml +1643 -0
  648. data/spec/fixtures/xy096gc2959/temp/GOA-iso19139.xml +784 -0
  649. data/spec/fixtures/xz518gz3362/metadata/geoMetadata.xml +2434 -0
  650. data/spec/fixtures/xz518gz3362/temp/UTTARAKHAND-iso19139-fc.xml +1643 -0
  651. data/spec/fixtures/xz518gz3362/temp/UTTARAKHAND-iso19139.xml +786 -0
  652. data/spec/fixtures/yh986wy4737/metadata/geoMetadata.xml +1160 -0
  653. data/spec/fixtures/yh986wy4737/temp/NAGALAND-iso19139-fc.xml +384 -0
  654. data/spec/fixtures/yh986wy4737/temp/NAGALAND-iso19139.xml +771 -0
  655. data/spec/fixtures/yn187fq4474/metadata/geoMetadata.xml +2168 -0
  656. data/spec/fixtures/yn187fq4474/temp/KARNATAKA-iso19139-fc.xml +1379 -0
  657. data/spec/fixtures/yn187fq4474/temp/KARNATAKA-iso19139.xml +784 -0
  658. data/spec/fixtures/yn236mw3250/metadata/geoMetadata.xml +2428 -0
  659. data/spec/fixtures/yn236mw3250/temp/TAMILNADU-iso19139-fc.xml +1643 -0
  660. data/spec/fixtures/yn236mw3250/temp/TAMILNADU-iso19139.xml +780 -0
  661. data/spec/fixtures/yz596nz0112/metadata/geoMetadata.xml +2012 -0
  662. data/spec/fixtures/yz596nz0112/temp/WEST_BENGAL-iso19139-fc.xml +1203 -0
  663. data/spec/fixtures/yz596nz0112/temp/WEST_BENGAL-iso19139.xml +804 -0
  664. data/spec/fixtures/zk596gy7380/metadata/geoMetadata.xml +4147 -0
  665. data/spec/fixtures/zk596gy7380/temp/DISTRICT1971-iso19139-fc.xml +3283 -0
  666. data/spec/fixtures/zk596gy7380/temp/DISTRICT1971-iso19139.xml +859 -0
  667. data/spec/fixtures/zn452hh7431/metadata/geoMetadata.xml +2337 -0
  668. data/spec/fixtures/zn452hh7431/temp/RAJASTHAN-iso19139-fc.xml +1505 -0
  669. data/spec/fixtures/zn452hh7431/temp/RAJASTHAN-iso19139.xml +827 -0
  670. data/spec/fixtures/zt093fw6519/metadata/geoMetadata.xml +2380 -0
  671. data/spec/fixtures/zt093fw6519/temp/MIZORAM-iso19139-fc.xml +1643 -0
  672. data/spec/fixtures/zt093fw6519/temp/MIZORAM-iso19139.xml +732 -0
  673. data/spec/fixtures/zv925hd6723/metadata/geoMetadata.xml +1473 -0
  674. data/spec/fixtures/zv925hd6723/temp/OGWELLS-iso19139-fc.xml +759 -0
  675. data/spec/fixtures/zv925hd6723/temp/OGWELLS-iso19139.xml +709 -0
  676. data/spec/fixtures/zy658cr1728/metadata/geoMetadata.xml +3814 -0
  677. data/spec/fixtures/zy658cr1728/temp/ANDAMAAN_NICOBAR_PT-iso19139-fc.xml +3075 -0
  678. data/spec/fixtures/zy658cr1728/temp/ANDAMAAN_NICOBAR_PT-iso19139.xml +734 -0
  679. data/spec/fixtures/zz943vx1492/metadata/geoMetadata.xml +861 -0
  680. data/spec/fixtures/zz943vx1492/temp/BASINS-iso19139-fc.xml +164 -0
  681. data/spec/fixtures/zz943vx1492/temp/BASINS-iso19139.xml +692 -0
  682. data/spec/integration/solr_spec_notready.rb +51 -0
  683. data/spec/test_net_ssh_krb.rb +21 -0
  684. data/spec/unit/gazetteer_spec.rb +100 -0
  685. data/spec/unit/transform_spec.rb +40 -0
  686. data/spec/unit/utils_spec.rb +32 -0
  687. metadata +1491 -0
@@ -0,0 +1,212 @@
1
+ # Turkish stopwords from LUCENE-559
2
+ # merged with the list from "Information Retrieval on Turkish Texts"
3
+ # (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
4
+ acaba
5
+ altmış
6
+ altı
7
+ ama
8
+ ancak
9
+ arada
10
+ aslında
11
+ ayrıca
12
+ bana
13
+ bazı
14
+ belki
15
+ ben
16
+ benden
17
+ beni
18
+ benim
19
+ beri
20
+ beş
21
+ bile
22
+ bin
23
+ bir
24
+ birçok
25
+ biri
26
+ birkaç
27
+ birkez
28
+ birşey
29
+ birşeyi
30
+ biz
31
+ bize
32
+ bizden
33
+ bizi
34
+ bizim
35
+ böyle
36
+ böylece
37
+ bu
38
+ buna
39
+ bunda
40
+ bundan
41
+ bunlar
42
+ bunları
43
+ bunların
44
+ bunu
45
+ bunun
46
+ burada
47
+ çok
48
+ çünkü
49
+ da
50
+ daha
51
+ dahi
52
+ de
53
+ defa
54
+ değil
55
+ diğer
56
+ diye
57
+ doksan
58
+ dokuz
59
+ dolayı
60
+ dolayısıyla
61
+ dört
62
+ edecek
63
+ eden
64
+ ederek
65
+ edilecek
66
+ ediliyor
67
+ edilmesi
68
+ ediyor
69
+ eğer
70
+ elli
71
+ en
72
+ etmesi
73
+ etti
74
+ ettiği
75
+ ettiğini
76
+ gibi
77
+ göre
78
+ halen
79
+ hangi
80
+ hatta
81
+ hem
82
+ henüz
83
+ hep
84
+ hepsi
85
+ her
86
+ herhangi
87
+ herkesin
88
+ hiç
89
+ hiçbir
90
+ için
91
+ iki
92
+ ile
93
+ ilgili
94
+ ise
95
+ işte
96
+ itibaren
97
+ itibariyle
98
+ kadar
99
+ karşın
100
+ katrilyon
101
+ kendi
102
+ kendilerine
103
+ kendini
104
+ kendisi
105
+ kendisine
106
+ kendisini
107
+ kez
108
+ ki
109
+ kim
110
+ kimden
111
+ kime
112
+ kimi
113
+ kimse
114
+ kırk
115
+ milyar
116
+ milyon
117
+ mu
118
+
119
+
120
+ nasıl
121
+ ne
122
+ neden
123
+ nedenle
124
+ nerde
125
+ nerede
126
+ nereye
127
+ niye
128
+ niçin
129
+ o
130
+ olan
131
+ olarak
132
+ oldu
133
+ olduğu
134
+ olduğunu
135
+ olduklarını
136
+ olmadı
137
+ olmadığı
138
+ olmak
139
+ olması
140
+ olmayan
141
+ olmaz
142
+ olsa
143
+ olsun
144
+ olup
145
+ olur
146
+ olursa
147
+ oluyor
148
+ on
149
+ ona
150
+ ondan
151
+ onlar
152
+ onlardan
153
+ onları
154
+ onların
155
+ onu
156
+ onun
157
+ otuz
158
+ oysa
159
+ öyle
160
+ pek
161
+ rağmen
162
+ sadece
163
+ sanki
164
+ sekiz
165
+ seksen
166
+ sen
167
+ senden
168
+ seni
169
+ senin
170
+ siz
171
+ sizden
172
+ sizi
173
+ sizin
174
+ şey
175
+ şeyden
176
+ şeyi
177
+ şeyler
178
+ şöyle
179
+ şu
180
+ şuna
181
+ şunda
182
+ şundan
183
+ şunları
184
+ şunu
185
+ tarafından
186
+ trilyon
187
+ tüm
188
+ üç
189
+ üzere
190
+ var
191
+ vardı
192
+ ve
193
+ veya
194
+ ya
195
+ yani
196
+ yapacak
197
+ yapılan
198
+ yapılması
199
+ yapıyor
200
+ yapmak
201
+ yaptı
202
+ yaptığı
203
+ yaptığını
204
+ yaptıkları
205
+ yedi
206
+ yerine
207
+ yetmiş
208
+ yine
209
+ yirmi
210
+ yoksa
211
+ yüz
212
+ zaten
@@ -0,0 +1,29 @@
1
+ #
2
+ # This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
3
+ #
4
+ # Add entries to this file in order to override the statistical model in terms
5
+ # of segmentation, readings and part-of-speech tags. Notice that entries do
6
+ # not have weights since they are always used when found. This is by-design
7
+ # in order to maximize ease-of-use.
8
+ #
9
+ # Entries are defined using the following CSV format:
10
+ # <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
11
+ #
12
+ # Notice that a single half-width space separates tokens and readings, and
13
+ # that the number tokens and readings must match exactly.
14
+ #
15
+ # Also notice that multiple entries with the same <text> is undefined.
16
+ #
17
+ # Whitespace only lines are ignored. Comments are not allowed on entry lines.
18
+ #
19
+
20
+ # Custom segmentation for kanji compounds
21
+ 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
22
+ 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
23
+
24
+ # Custom segmentation for compound katakana
25
+ トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
26
+ ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
27
+
28
+ # Custom reading for former sumo wrestler
29
+ 朝青龍,朝青龍,アサショウリュウ,カスタム人名
@@ -0,0 +1,3813 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+
14
+ # This map converts alphabetic, numeric, and symbolic Unicode characters
15
+ # which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
16
+ # block) into their ASCII equivalents, if one exists.
17
+ #
18
+ # Characters from the following Unicode blocks are converted; however, only
19
+ # those characters with reasonable ASCII alternatives are converted:
20
+ #
21
+ # - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
22
+ # - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
23
+ # - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
24
+ # - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
25
+ # - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
26
+ # - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
27
+ # - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
28
+ # - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
29
+ # - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
30
+ # - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
31
+ # - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
32
+ # - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
33
+ # - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
34
+ # - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
35
+ # - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
36
+ # - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
37
+ #
38
+ # See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
39
+ #
40
+ # The set of character conversions supported by this map is a superset of
41
+ # those supported by the map represented by mapping-ISOLatin1Accent.txt.
42
+ #
43
+ # See the bottom of this file for the Perl script used to generate the contents
44
+ # of this file (without this header) from ASCIIFoldingFilter.java.
45
+
46
+
47
+ # Syntax:
48
+ # "source" => "target"
49
+ # "source".length() > 0 (source cannot be empty.)
50
+ # "target".length() >= 0 (target can be empty.)
51
+
52
+
53
+ # À [LATIN CAPITAL LETTER A WITH GRAVE]
54
+ "\u00C0" => "A"
55
+
56
+ # Á [LATIN CAPITAL LETTER A WITH ACUTE]
57
+ "\u00C1" => "A"
58
+
59
+ # Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
60
+ "\u00C2" => "A"
61
+
62
+ # Ã [LATIN CAPITAL LETTER A WITH TILDE]
63
+ "\u00C3" => "A"
64
+
65
+ # Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
66
+ "\u00C4" => "A"
67
+
68
+ # Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
69
+ "\u00C5" => "A"
70
+
71
+ # Ā [LATIN CAPITAL LETTER A WITH MACRON]
72
+ "\u0100" => "A"
73
+
74
+ # Ă [LATIN CAPITAL LETTER A WITH BREVE]
75
+ "\u0102" => "A"
76
+
77
+ # Ą [LATIN CAPITAL LETTER A WITH OGONEK]
78
+ "\u0104" => "A"
79
+
80
+ # Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
81
+ "\u018F" => "A"
82
+
83
+ # Ǎ [LATIN CAPITAL LETTER A WITH CARON]
84
+ "\u01CD" => "A"
85
+
86
+ # Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
87
+ "\u01DE" => "A"
88
+
89
+ # Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
90
+ "\u01E0" => "A"
91
+
92
+ # Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
93
+ "\u01FA" => "A"
94
+
95
+ # Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
96
+ "\u0200" => "A"
97
+
98
+ # Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
99
+ "\u0202" => "A"
100
+
101
+ # Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
102
+ "\u0226" => "A"
103
+
104
+ # Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
105
+ "\u023A" => "A"
106
+
107
+ # ᴀ [LATIN LETTER SMALL CAPITAL A]
108
+ "\u1D00" => "A"
109
+
110
+ # Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
111
+ "\u1E00" => "A"
112
+
113
+ # Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
114
+ "\u1EA0" => "A"
115
+
116
+ # Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
117
+ "\u1EA2" => "A"
118
+
119
+ # Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
120
+ "\u1EA4" => "A"
121
+
122
+ # Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
123
+ "\u1EA6" => "A"
124
+
125
+ # Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
126
+ "\u1EA8" => "A"
127
+
128
+ # Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
129
+ "\u1EAA" => "A"
130
+
131
+ # Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
132
+ "\u1EAC" => "A"
133
+
134
+ # Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
135
+ "\u1EAE" => "A"
136
+
137
+ # Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
138
+ "\u1EB0" => "A"
139
+
140
+ # Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
141
+ "\u1EB2" => "A"
142
+
143
+ # Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
144
+ "\u1EB4" => "A"
145
+
146
+ # Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
147
+ "\u1EB6" => "A"
148
+
149
+ # Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
150
+ "\u24B6" => "A"
151
+
152
+ # A [FULLWIDTH LATIN CAPITAL LETTER A]
153
+ "\uFF21" => "A"
154
+
155
+ # à [LATIN SMALL LETTER A WITH GRAVE]
156
+ "\u00E0" => "a"
157
+
158
+ # á [LATIN SMALL LETTER A WITH ACUTE]
159
+ "\u00E1" => "a"
160
+
161
+ # â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
162
+ "\u00E2" => "a"
163
+
164
+ # ã [LATIN SMALL LETTER A WITH TILDE]
165
+ "\u00E3" => "a"
166
+
167
+ # ä [LATIN SMALL LETTER A WITH DIAERESIS]
168
+ "\u00E4" => "a"
169
+
170
+ # å [LATIN SMALL LETTER A WITH RING ABOVE]
171
+ "\u00E5" => "a"
172
+
173
+ # ā [LATIN SMALL LETTER A WITH MACRON]
174
+ "\u0101" => "a"
175
+
176
+ # ă [LATIN SMALL LETTER A WITH BREVE]
177
+ "\u0103" => "a"
178
+
179
+ # ą [LATIN SMALL LETTER A WITH OGONEK]
180
+ "\u0105" => "a"
181
+
182
+ # ǎ [LATIN SMALL LETTER A WITH CARON]
183
+ "\u01CE" => "a"
184
+
185
+ # ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
186
+ "\u01DF" => "a"
187
+
188
+ # ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
189
+ "\u01E1" => "a"
190
+
191
+ # ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
192
+ "\u01FB" => "a"
193
+
194
+ # ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
195
+ "\u0201" => "a"
196
+
197
+ # ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
198
+ "\u0203" => "a"
199
+
200
+ # ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
201
+ "\u0227" => "a"
202
+
203
+ # ɐ [LATIN SMALL LETTER TURNED A]
204
+ "\u0250" => "a"
205
+
206
+ # ə [LATIN SMALL LETTER SCHWA]
207
+ "\u0259" => "a"
208
+
209
+ # ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
210
+ "\u025A" => "a"
211
+
212
+ # ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
213
+ "\u1D8F" => "a"
214
+
215
+ # ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
216
+ "\u1D95" => "a"
217
+
218
+ # ạ [LATIN SMALL LETTER A WITH RING BELOW]
219
+ "\u1E01" => "a"
220
+
221
+ # ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
222
+ "\u1E9A" => "a"
223
+
224
+ # ạ [LATIN SMALL LETTER A WITH DOT BELOW]
225
+ "\u1EA1" => "a"
226
+
227
+ # ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
228
+ "\u1EA3" => "a"
229
+
230
+ # ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
231
+ "\u1EA5" => "a"
232
+
233
+ # ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
234
+ "\u1EA7" => "a"
235
+
236
+ # ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
237
+ "\u1EA9" => "a"
238
+
239
+ # ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
240
+ "\u1EAB" => "a"
241
+
242
+ # ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
243
+ "\u1EAD" => "a"
244
+
245
+ # ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
246
+ "\u1EAF" => "a"
247
+
248
+ # ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
249
+ "\u1EB1" => "a"
250
+
251
+ # ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
252
+ "\u1EB3" => "a"
253
+
254
+ # ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
255
+ "\u1EB5" => "a"
256
+
257
+ # ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
258
+ "\u1EB7" => "a"
259
+
260
+ # ₐ [LATIN SUBSCRIPT SMALL LETTER A]
261
+ "\u2090" => "a"
262
+
263
+ # ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
264
+ "\u2094" => "a"
265
+
266
+ # ⓐ [CIRCLED LATIN SMALL LETTER A]
267
+ "\u24D0" => "a"
268
+
269
+ # ⱥ [LATIN SMALL LETTER A WITH STROKE]
270
+ "\u2C65" => "a"
271
+
272
+ # Ɐ [LATIN CAPITAL LETTER TURNED A]
273
+ "\u2C6F" => "a"
274
+
275
+ # a [FULLWIDTH LATIN SMALL LETTER A]
276
+ "\uFF41" => "a"
277
+
278
+ # Ꜳ [LATIN CAPITAL LETTER AA]
279
+ "\uA732" => "AA"
280
+
281
+ # Æ [LATIN CAPITAL LETTER AE]
282
+ "\u00C6" => "AE"
283
+
284
+ # Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
285
+ "\u01E2" => "AE"
286
+
287
+ # Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
288
+ "\u01FC" => "AE"
289
+
290
+ # ᴁ [LATIN LETTER SMALL CAPITAL AE]
291
+ "\u1D01" => "AE"
292
+
293
+ # Ꜵ [LATIN CAPITAL LETTER AO]
294
+ "\uA734" => "AO"
295
+
296
+ # Ꜷ [LATIN CAPITAL LETTER AU]
297
+ "\uA736" => "AU"
298
+
299
+ # Ꜹ [LATIN CAPITAL LETTER AV]
300
+ "\uA738" => "AV"
301
+
302
+ # Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
303
+ "\uA73A" => "AV"
304
+
305
+ # Ꜽ [LATIN CAPITAL LETTER AY]
306
+ "\uA73C" => "AY"
307
+
308
+ # ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
309
+ "\u249C" => "(a)"
310
+
311
+ # ꜳ [LATIN SMALL LETTER AA]
312
+ "\uA733" => "aa"
313
+
314
+ # æ [LATIN SMALL LETTER AE]
315
+ "\u00E6" => "ae"
316
+
317
+ # ǣ [LATIN SMALL LETTER AE WITH MACRON]
318
+ "\u01E3" => "ae"
319
+
320
+ # ǽ [LATIN SMALL LETTER AE WITH ACUTE]
321
+ "\u01FD" => "ae"
322
+
323
+ # ᴂ [LATIN SMALL LETTER TURNED AE]
324
+ "\u1D02" => "ae"
325
+
326
+ # ꜵ [LATIN SMALL LETTER AO]
327
+ "\uA735" => "ao"
328
+
329
+ # ꜷ [LATIN SMALL LETTER AU]
330
+ "\uA737" => "au"
331
+
332
+ # ꜹ [LATIN SMALL LETTER AV]
333
+ "\uA739" => "av"
334
+
335
+ # ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
336
+ "\uA73B" => "av"
337
+
338
+ # ꜽ [LATIN SMALL LETTER AY]
339
+ "\uA73D" => "ay"
340
+
341
+ # Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
342
+ "\u0181" => "B"
343
+
344
+ # Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
345
+ "\u0182" => "B"
346
+
347
+ # Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
348
+ "\u0243" => "B"
349
+
350
+ # ʙ [LATIN LETTER SMALL CAPITAL B]
351
+ "\u0299" => "B"
352
+
353
+ # ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
354
+ "\u1D03" => "B"
355
+
356
+ # Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
357
+ "\u1E02" => "B"
358
+
359
+ # Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
360
+ "\u1E04" => "B"
361
+
362
+ # Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
363
+ "\u1E06" => "B"
364
+
365
+ # Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
366
+ "\u24B7" => "B"
367
+
368
+ # B [FULLWIDTH LATIN CAPITAL LETTER B]
369
+ "\uFF22" => "B"
370
+
371
+ # ƀ [LATIN SMALL LETTER B WITH STROKE]
372
+ "\u0180" => "b"
373
+
374
+ # ƃ [LATIN SMALL LETTER B WITH TOPBAR]
375
+ "\u0183" => "b"
376
+
377
+ # ɓ [LATIN SMALL LETTER B WITH HOOK]
378
+ "\u0253" => "b"
379
+
380
+ # ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
381
+ "\u1D6C" => "b"
382
+
383
+ # ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
384
+ "\u1D80" => "b"
385
+
386
+ # ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
387
+ "\u1E03" => "b"
388
+
389
+ # ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
390
+ "\u1E05" => "b"
391
+
392
+ # ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
393
+ "\u1E07" => "b"
394
+
395
+ # ⓑ [CIRCLED LATIN SMALL LETTER B]
396
+ "\u24D1" => "b"
397
+
398
+ # b [FULLWIDTH LATIN SMALL LETTER B]
399
+ "\uFF42" => "b"
400
+
401
+ # ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
402
+ "\u249D" => "(b)"
403
+
404
+ # Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
405
+ "\u00C7" => "C"
406
+
407
+ # Ć [LATIN CAPITAL LETTER C WITH ACUTE]
408
+ "\u0106" => "C"
409
+
410
+ # Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
411
+ "\u0108" => "C"
412
+
413
+ # Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
414
+ "\u010A" => "C"
415
+
416
+ # Č [LATIN CAPITAL LETTER C WITH CARON]
417
+ "\u010C" => "C"
418
+
419
+ # Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
420
+ "\u0187" => "C"
421
+
422
+ # Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
423
+ "\u023B" => "C"
424
+
425
+ # ʗ [LATIN LETTER STRETCHED C]
426
+ "\u0297" => "C"
427
+
428
+ # ᴄ [LATIN LETTER SMALL CAPITAL C]
429
+ "\u1D04" => "C"
430
+
431
+ # Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
432
+ "\u1E08" => "C"
433
+
434
+ # Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
435
+ "\u24B8" => "C"
436
+
437
+ # C [FULLWIDTH LATIN CAPITAL LETTER C]
438
+ "\uFF23" => "C"
439
+
440
+ # ç [LATIN SMALL LETTER C WITH CEDILLA]
441
+ "\u00E7" => "c"
442
+
443
+ # ć [LATIN SMALL LETTER C WITH ACUTE]
444
+ "\u0107" => "c"
445
+
446
+ # ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
447
+ "\u0109" => "c"
448
+
449
+ # ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
450
+ "\u010B" => "c"
451
+
452
+ # č [LATIN SMALL LETTER C WITH CARON]
453
+ "\u010D" => "c"
454
+
455
+ # ƈ [LATIN SMALL LETTER C WITH HOOK]
456
+ "\u0188" => "c"
457
+
458
+ # ȼ [LATIN SMALL LETTER C WITH STROKE]
459
+ "\u023C" => "c"
460
+
461
+ # ɕ [LATIN SMALL LETTER C WITH CURL]
462
+ "\u0255" => "c"
463
+
464
+ # ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
465
+ "\u1E09" => "c"
466
+
467
+ # ↄ [LATIN SMALL LETTER REVERSED C]
468
+ "\u2184" => "c"
469
+
470
+ # ⓒ [CIRCLED LATIN SMALL LETTER C]
471
+ "\u24D2" => "c"
472
+
473
+ # Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
474
+ "\uA73E" => "c"
475
+
476
+ # ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
477
+ "\uA73F" => "c"
478
+
479
+ # c [FULLWIDTH LATIN SMALL LETTER C]
480
+ "\uFF43" => "c"
481
+
482
+ # ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
483
+ "\u249E" => "(c)"
484
+
485
+ # Ð [LATIN CAPITAL LETTER ETH]
486
+ "\u00D0" => "D"
487
+
488
+ # Ď [LATIN CAPITAL LETTER D WITH CARON]
489
+ "\u010E" => "D"
490
+
491
+ # Đ [LATIN CAPITAL LETTER D WITH STROKE]
492
+ "\u0110" => "D"
493
+
494
+ # Ɖ [LATIN CAPITAL LETTER AFRICAN D]
495
+ "\u0189" => "D"
496
+
497
+ # Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
498
+ "\u018A" => "D"
499
+
500
+ # Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
501
+ "\u018B" => "D"
502
+
503
+ # ᴅ [LATIN LETTER SMALL CAPITAL D]
504
+ "\u1D05" => "D"
505
+
506
+ # ᴆ [LATIN LETTER SMALL CAPITAL ETH]
507
+ "\u1D06" => "D"
508
+
509
+ # Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
510
+ "\u1E0A" => "D"
511
+
512
+ # Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
513
+ "\u1E0C" => "D"
514
+
515
+ # Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
516
+ "\u1E0E" => "D"
517
+
518
+ # Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
519
+ "\u1E10" => "D"
520
+
521
+ # Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
522
+ "\u1E12" => "D"
523
+
524
+ # Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
525
+ "\u24B9" => "D"
526
+
527
+ # Ꝺ [LATIN CAPITAL LETTER INSULAR D]
528
+ "\uA779" => "D"
529
+
530
+ # D [FULLWIDTH LATIN CAPITAL LETTER D]
531
+ "\uFF24" => "D"
532
+
533
+ # ð [LATIN SMALL LETTER ETH]
534
+ "\u00F0" => "d"
535
+
536
+ # ď [LATIN SMALL LETTER D WITH CARON]
537
+ "\u010F" => "d"
538
+
539
+ # đ [LATIN SMALL LETTER D WITH STROKE]
540
+ "\u0111" => "d"
541
+
542
+ # ƌ [LATIN SMALL LETTER D WITH TOPBAR]
543
+ "\u018C" => "d"
544
+
545
+ # ȡ [LATIN SMALL LETTER D WITH CURL]
546
+ "\u0221" => "d"
547
+
548
+ # ɖ [LATIN SMALL LETTER D WITH TAIL]
549
+ "\u0256" => "d"
550
+
551
+ # ɗ [LATIN SMALL LETTER D WITH HOOK]
552
+ "\u0257" => "d"
553
+
554
+ # ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
555
+ "\u1D6D" => "d"
556
+
557
+ # ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
558
+ "\u1D81" => "d"
559
+
560
+ # ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
561
+ "\u1D91" => "d"
562
+
563
+ # ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
564
+ "\u1E0B" => "d"
565
+
566
+ # ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
567
+ "\u1E0D" => "d"
568
+
569
+ # ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
570
+ "\u1E0F" => "d"
571
+
572
+ # ḑ [LATIN SMALL LETTER D WITH CEDILLA]
573
+ "\u1E11" => "d"
574
+
575
+ # ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
576
+ "\u1E13" => "d"
577
+
578
+ # ⓓ [CIRCLED LATIN SMALL LETTER D]
579
+ "\u24D3" => "d"
580
+
581
+ # ꝺ [LATIN SMALL LETTER INSULAR D]
582
+ "\uA77A" => "d"
583
+
584
+ # d [FULLWIDTH LATIN SMALL LETTER D]
585
+ "\uFF44" => "d"
586
+
587
+ # DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
588
+ "\u01C4" => "DZ"
589
+
590
+ # DZ [LATIN CAPITAL LETTER DZ]
591
+ "\u01F1" => "DZ"
592
+
593
+ # Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
594
+ "\u01C5" => "Dz"
595
+
596
+ # Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
597
+ "\u01F2" => "Dz"
598
+
599
+ # ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
600
+ "\u249F" => "(d)"
601
+
602
+ # ȸ [LATIN SMALL LETTER DB DIGRAPH]
603
+ "\u0238" => "db"
604
+
605
+ # dž [LATIN SMALL LETTER DZ WITH CARON]
606
+ "\u01C6" => "dz"
607
+
608
+ # dz [LATIN SMALL LETTER DZ]
609
+ "\u01F3" => "dz"
610
+
611
+ # ʣ [LATIN SMALL LETTER DZ DIGRAPH]
612
+ "\u02A3" => "dz"
613
+
614
+ # ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
615
+ "\u02A5" => "dz"
616
+
617
+ # È [LATIN CAPITAL LETTER E WITH GRAVE]
618
+ "\u00C8" => "E"
619
+
620
+ # É [LATIN CAPITAL LETTER E WITH ACUTE]
621
+ "\u00C9" => "E"
622
+
623
+ # Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
624
+ "\u00CA" => "E"
625
+
626
+ # Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
627
+ "\u00CB" => "E"
628
+
629
+ # Ē [LATIN CAPITAL LETTER E WITH MACRON]
630
+ "\u0112" => "E"
631
+
632
+ # Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
633
+ "\u0114" => "E"
634
+
635
+ # Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
636
+ "\u0116" => "E"
637
+
638
+ # Ę [LATIN CAPITAL LETTER E WITH OGONEK]
639
+ "\u0118" => "E"
640
+
641
+ # Ě [LATIN CAPITAL LETTER E WITH CARON]
642
+ "\u011A" => "E"
643
+
644
+ # Ǝ [LATIN CAPITAL LETTER REVERSED E]
645
+ "\u018E" => "E"
646
+
647
+ # Ɛ [LATIN CAPITAL LETTER OPEN E]
648
+ "\u0190" => "E"
649
+
650
+ # Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
651
+ "\u0204" => "E"
652
+
653
+ # Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
654
+ "\u0206" => "E"
655
+
656
+ # Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
657
+ "\u0228" => "E"
658
+
659
+ # Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
660
+ "\u0246" => "E"
661
+
662
+ # ᴇ [LATIN LETTER SMALL CAPITAL E]
663
+ "\u1D07" => "E"
664
+
665
+ # Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
666
+ "\u1E14" => "E"
667
+
668
+ # Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
669
+ "\u1E16" => "E"
670
+
671
+ # Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
672
+ "\u1E18" => "E"
673
+
674
+ # Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
675
+ "\u1E1A" => "E"
676
+
677
+ # Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
678
+ "\u1E1C" => "E"
679
+
680
+ # Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
681
+ "\u1EB8" => "E"
682
+
683
+ # Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
684
+ "\u1EBA" => "E"
685
+
686
+ # Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
687
+ "\u1EBC" => "E"
688
+
689
+ # Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
690
+ "\u1EBE" => "E"
691
+
692
+ # Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
693
+ "\u1EC0" => "E"
694
+
695
+ # Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
696
+ "\u1EC2" => "E"
697
+
698
+ # Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
699
+ "\u1EC4" => "E"
700
+
701
+ # Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
702
+ "\u1EC6" => "E"
703
+
704
+ # Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
705
+ "\u24BA" => "E"
706
+
707
+ # ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
708
+ "\u2C7B" => "E"
709
+
710
+ # E [FULLWIDTH LATIN CAPITAL LETTER E]
711
+ "\uFF25" => "E"
712
+
713
+ # è [LATIN SMALL LETTER E WITH GRAVE]
714
+ "\u00E8" => "e"
715
+
716
+ # é [LATIN SMALL LETTER E WITH ACUTE]
717
+ "\u00E9" => "e"
718
+
719
+ # ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
720
+ "\u00EA" => "e"
721
+
722
+ # ë [LATIN SMALL LETTER E WITH DIAERESIS]
723
+ "\u00EB" => "e"
724
+
725
+ # ē [LATIN SMALL LETTER E WITH MACRON]
726
+ "\u0113" => "e"
727
+
728
+ # ĕ [LATIN SMALL LETTER E WITH BREVE]
729
+ "\u0115" => "e"
730
+
731
+ # ė [LATIN SMALL LETTER E WITH DOT ABOVE]
732
+ "\u0117" => "e"
733
+
734
+ # ę [LATIN SMALL LETTER E WITH OGONEK]
735
+ "\u0119" => "e"
736
+
737
+ # ě [LATIN SMALL LETTER E WITH CARON]
738
+ "\u011B" => "e"
739
+
740
+ # ǝ [LATIN SMALL LETTER TURNED E]
741
+ "\u01DD" => "e"
742
+
743
+ # ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
744
+ "\u0205" => "e"
745
+
746
+ # ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
747
+ "\u0207" => "e"
748
+
749
+ # ȩ [LATIN SMALL LETTER E WITH CEDILLA]
750
+ "\u0229" => "e"
751
+
752
+ # ɇ [LATIN SMALL LETTER E WITH STROKE]
753
+ "\u0247" => "e"
754
+
755
+ # ɘ [LATIN SMALL LETTER REVERSED E]
756
+ "\u0258" => "e"
757
+
758
+ # ɛ [LATIN SMALL LETTER OPEN E]
759
+ "\u025B" => "e"
760
+
761
+ # ɜ [LATIN SMALL LETTER REVERSED OPEN E]
762
+ "\u025C" => "e"
763
+
764
+ # ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
765
+ "\u025D" => "e"
766
+
767
+ # ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
768
+ "\u025E" => "e"
769
+
770
+ # ʚ [LATIN SMALL LETTER CLOSED OPEN E]
771
+ "\u029A" => "e"
772
+
773
+ # ᴈ [LATIN SMALL LETTER TURNED OPEN E]
774
+ "\u1D08" => "e"
775
+
776
+ # ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
777
+ "\u1D92" => "e"
778
+
779
+ # ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
780
+ "\u1D93" => "e"
781
+
782
+ # ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
783
+ "\u1D94" => "e"
784
+
785
+ # ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
786
+ "\u1E15" => "e"
787
+
788
+ # ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
789
+ "\u1E17" => "e"
790
+
791
+ # ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
792
+ "\u1E19" => "e"
793
+
794
+ # ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
795
+ "\u1E1B" => "e"
796
+
797
+ # ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
798
+ "\u1E1D" => "e"
799
+
800
+ # ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
801
+ "\u1EB9" => "e"
802
+
803
+ # ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
804
+ "\u1EBB" => "e"
805
+
806
+ # ẽ [LATIN SMALL LETTER E WITH TILDE]
807
+ "\u1EBD" => "e"
808
+
809
+ # ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
810
+ "\u1EBF" => "e"
811
+
812
+ # ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
813
+ "\u1EC1" => "e"
814
+
815
+ # ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
816
+ "\u1EC3" => "e"
817
+
818
+ # ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
819
+ "\u1EC5" => "e"
820
+
821
+ # ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
822
+ "\u1EC7" => "e"
823
+
824
+ # ₑ [LATIN SUBSCRIPT SMALL LETTER E]
825
+ "\u2091" => "e"
826
+
827
+ # ⓔ [CIRCLED LATIN SMALL LETTER E]
828
+ "\u24D4" => "e"
829
+
830
+ # ⱸ [LATIN SMALL LETTER E WITH NOTCH]
831
+ "\u2C78" => "e"
832
+
833
+ # e [FULLWIDTH LATIN SMALL LETTER E]
834
+ "\uFF45" => "e"
835
+
836
+ # ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
837
+ "\u24A0" => "(e)"
838
+
839
+ # Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
840
+ "\u0191" => "F"
841
+
842
+ # Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
843
+ "\u1E1E" => "F"
844
+
845
+ # Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
846
+ "\u24BB" => "F"
847
+
848
+ # ꜰ [LATIN LETTER SMALL CAPITAL F]
849
+ "\uA730" => "F"
850
+
851
+ # Ꝼ [LATIN CAPITAL LETTER INSULAR F]
852
+ "\uA77B" => "F"
853
+
854
+ # ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
855
+ "\uA7FB" => "F"
856
+
857
+ # F [FULLWIDTH LATIN CAPITAL LETTER F]
858
+ "\uFF26" => "F"
859
+
860
+ # ƒ [LATIN SMALL LETTER F WITH HOOK]
861
+ "\u0192" => "f"
862
+
863
+ # ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
864
+ "\u1D6E" => "f"
865
+
866
+ # ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
867
+ "\u1D82" => "f"
868
+
869
+ # ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
870
+ "\u1E1F" => "f"
871
+
872
+ # ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
873
+ "\u1E9B" => "f"
874
+
875
+ # ⓕ [CIRCLED LATIN SMALL LETTER F]
876
+ "\u24D5" => "f"
877
+
878
+ # ꝼ [LATIN SMALL LETTER INSULAR F]
879
+ "\uA77C" => "f"
880
+
881
+ # f [FULLWIDTH LATIN SMALL LETTER F]
882
+ "\uFF46" => "f"
883
+
884
+ # ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
885
+ "\u24A1" => "(f)"
886
+
887
+ # ff [LATIN SMALL LIGATURE FF]
888
+ "\uFB00" => "ff"
889
+
890
+ # ffi [LATIN SMALL LIGATURE FFI]
891
+ "\uFB03" => "ffi"
892
+
893
+ # ffl [LATIN SMALL LIGATURE FFL]
894
+ "\uFB04" => "ffl"
895
+
896
+ # fi [LATIN SMALL LIGATURE FI]
897
+ "\uFB01" => "fi"
898
+
899
+ # fl [LATIN SMALL LIGATURE FL]
900
+ "\uFB02" => "fl"
901
+
902
+ # Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
903
+ "\u011C" => "G"
904
+
905
+ # Ğ [LATIN CAPITAL LETTER G WITH BREVE]
906
+ "\u011E" => "G"
907
+
908
+ # Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
909
+ "\u0120" => "G"
910
+
911
+ # Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
912
+ "\u0122" => "G"
913
+
914
+ # Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
915
+ "\u0193" => "G"
916
+
917
+ # Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
918
+ "\u01E4" => "G"
919
+
920
+ # ǥ [LATIN SMALL LETTER G WITH STROKE]
921
+ "\u01E5" => "G"
922
+
923
+ # Ǧ [LATIN CAPITAL LETTER G WITH CARON]
924
+ "\u01E6" => "G"
925
+
926
+ # ǧ [LATIN SMALL LETTER G WITH CARON]
927
+ "\u01E7" => "G"
928
+
929
+ # Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
930
+ "\u01F4" => "G"
931
+
932
+ # ɢ [LATIN LETTER SMALL CAPITAL G]
933
+ "\u0262" => "G"
934
+
935
+ # ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
936
+ "\u029B" => "G"
937
+
938
+ # Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
939
+ "\u1E20" => "G"
940
+
941
+ # Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
942
+ "\u24BC" => "G"
943
+
944
+ # Ᵹ [LATIN CAPITAL LETTER INSULAR G]
945
+ "\uA77D" => "G"
946
+
947
+ # Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
948
+ "\uA77E" => "G"
949
+
950
+ # G [FULLWIDTH LATIN CAPITAL LETTER G]
951
+ "\uFF27" => "G"
952
+
953
+ # ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
954
+ "\u011D" => "g"
955
+
956
+ # ğ [LATIN SMALL LETTER G WITH BREVE]
957
+ "\u011F" => "g"
958
+
959
+ # ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
960
+ "\u0121" => "g"
961
+
962
+ # ģ [LATIN SMALL LETTER G WITH CEDILLA]
963
+ "\u0123" => "g"
964
+
965
+ # ǵ [LATIN SMALL LETTER G WITH ACUTE]
966
+ "\u01F5" => "g"
967
+
968
+ # ɠ [LATIN SMALL LETTER G WITH HOOK]
969
+ "\u0260" => "g"
970
+
971
+ # ɡ [LATIN SMALL LETTER SCRIPT G]
972
+ "\u0261" => "g"
973
+
974
+ # ᵷ [LATIN SMALL LETTER TURNED G]
975
+ "\u1D77" => "g"
976
+
977
+ # ᵹ [LATIN SMALL LETTER INSULAR G]
978
+ "\u1D79" => "g"
979
+
980
+ # ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
981
+ "\u1D83" => "g"
982
+
983
+ # ḡ [LATIN SMALL LETTER G WITH MACRON]
984
+ "\u1E21" => "g"
985
+
986
+ # ⓖ [CIRCLED LATIN SMALL LETTER G]
987
+ "\u24D6" => "g"
988
+
989
+ # ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
990
+ "\uA77F" => "g"
991
+
992
+ # g [FULLWIDTH LATIN SMALL LETTER G]
993
+ "\uFF47" => "g"
994
+
995
+ # ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
996
+ "\u24A2" => "(g)"
997
+
998
+ # Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
999
+ "\u0124" => "H"
1000
+
1001
+ # Ħ [LATIN CAPITAL LETTER H WITH STROKE]
1002
+ "\u0126" => "H"
1003
+
1004
+ # Ȟ [LATIN CAPITAL LETTER H WITH CARON]
1005
+ "\u021E" => "H"
1006
+
1007
+ # ʜ [LATIN LETTER SMALL CAPITAL H]
1008
+ "\u029C" => "H"
1009
+
1010
+ # Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
1011
+ "\u1E22" => "H"
1012
+
1013
+ # Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
1014
+ "\u1E24" => "H"
1015
+
1016
+ # Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
1017
+ "\u1E26" => "H"
1018
+
1019
+ # Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
1020
+ "\u1E28" => "H"
1021
+
1022
+ # Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
1023
+ "\u1E2A" => "H"
1024
+
1025
+ # Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
1026
+ "\u24BD" => "H"
1027
+
1028
+ # Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
1029
+ "\u2C67" => "H"
1030
+
1031
+ # Ⱶ [LATIN CAPITAL LETTER HALF H]
1032
+ "\u2C75" => "H"
1033
+
1034
+ # H [FULLWIDTH LATIN CAPITAL LETTER H]
1035
+ "\uFF28" => "H"
1036
+
1037
+ # ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
1038
+ "\u0125" => "h"
1039
+
1040
+ # ħ [LATIN SMALL LETTER H WITH STROKE]
1041
+ "\u0127" => "h"
1042
+
1043
+ # ȟ [LATIN SMALL LETTER H WITH CARON]
1044
+ "\u021F" => "h"
1045
+
1046
+ # ɥ [LATIN SMALL LETTER TURNED H]
1047
+ "\u0265" => "h"
1048
+
1049
+ # ɦ [LATIN SMALL LETTER H WITH HOOK]
1050
+ "\u0266" => "h"
1051
+
1052
+ # ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
1053
+ "\u02AE" => "h"
1054
+
1055
+ # ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
1056
+ "\u02AF" => "h"
1057
+
1058
+ # ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
1059
+ "\u1E23" => "h"
1060
+
1061
+ # ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
1062
+ "\u1E25" => "h"
1063
+
1064
+ # ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
1065
+ "\u1E27" => "h"
1066
+
1067
+ # ḩ [LATIN SMALL LETTER H WITH CEDILLA]
1068
+ "\u1E29" => "h"
1069
+
1070
+ # ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
1071
+ "\u1E2B" => "h"
1072
+
1073
+ # ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
1074
+ "\u1E96" => "h"
1075
+
1076
+ # ⓗ [CIRCLED LATIN SMALL LETTER H]
1077
+ "\u24D7" => "h"
1078
+
1079
+ # ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
1080
+ "\u2C68" => "h"
1081
+
1082
+ # ⱶ [LATIN SMALL LETTER HALF H]
1083
+ "\u2C76" => "h"
1084
+
1085
+ # h [FULLWIDTH LATIN SMALL LETTER H]
1086
+ "\uFF48" => "h"
1087
+
1088
+ # Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
1089
+ "\u01F6" => "HV"
1090
+
1091
+ # ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
1092
+ "\u24A3" => "(h)"
1093
+
1094
+ # ƕ [LATIN SMALL LETTER HV]
1095
+ "\u0195" => "hv"
1096
+
1097
+ # Ì [LATIN CAPITAL LETTER I WITH GRAVE]
1098
+ "\u00CC" => "I"
1099
+
1100
+ # Í [LATIN CAPITAL LETTER I WITH ACUTE]
1101
+ "\u00CD" => "I"
1102
+
1103
+ # Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
1104
+ "\u00CE" => "I"
1105
+
1106
+ # Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
1107
+ "\u00CF" => "I"
1108
+
1109
+ # Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
1110
+ "\u0128" => "I"
1111
+
1112
+ # Ī [LATIN CAPITAL LETTER I WITH MACRON]
1113
+ "\u012A" => "I"
1114
+
1115
+ # Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
1116
+ "\u012C" => "I"
1117
+
1118
+ # Į [LATIN CAPITAL LETTER I WITH OGONEK]
1119
+ "\u012E" => "I"
1120
+
1121
+ # İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
1122
+ "\u0130" => "I"
1123
+
1124
+ # Ɩ [LATIN CAPITAL LETTER IOTA]
1125
+ "\u0196" => "I"
1126
+
1127
+ # Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
1128
+ "\u0197" => "I"
1129
+
1130
+ # Ǐ [LATIN CAPITAL LETTER I WITH CARON]
1131
+ "\u01CF" => "I"
1132
+
1133
+ # Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
1134
+ "\u0208" => "I"
1135
+
1136
+ # Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
1137
+ "\u020A" => "I"
1138
+
1139
+ # ɪ [LATIN LETTER SMALL CAPITAL I]
1140
+ "\u026A" => "I"
1141
+
1142
+ # ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
1143
+ "\u1D7B" => "I"
1144
+
1145
+ # Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
1146
+ "\u1E2C" => "I"
1147
+
1148
+ # Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
1149
+ "\u1E2E" => "I"
1150
+
1151
+ # Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
1152
+ "\u1EC8" => "I"
1153
+
1154
+ # Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
1155
+ "\u1ECA" => "I"
1156
+
1157
+ # Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
1158
+ "\u24BE" => "I"
1159
+
1160
+ # ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
1161
+ "\uA7FE" => "I"
1162
+
1163
+ # I [FULLWIDTH LATIN CAPITAL LETTER I]
1164
+ "\uFF29" => "I"
1165
+
1166
+ # ì [LATIN SMALL LETTER I WITH GRAVE]
1167
+ "\u00EC" => "i"
1168
+
1169
+ # í [LATIN SMALL LETTER I WITH ACUTE]
1170
+ "\u00ED" => "i"
1171
+
1172
+ # î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
1173
+ "\u00EE" => "i"
1174
+
1175
+ # ï [LATIN SMALL LETTER I WITH DIAERESIS]
1176
+ "\u00EF" => "i"
1177
+
1178
+ # ĩ [LATIN SMALL LETTER I WITH TILDE]
1179
+ "\u0129" => "i"
1180
+
1181
+ # ī [LATIN SMALL LETTER I WITH MACRON]
1182
+ "\u012B" => "i"
1183
+
1184
+ # ĭ [LATIN SMALL LETTER I WITH BREVE]
1185
+ "\u012D" => "i"
1186
+
1187
+ # į [LATIN SMALL LETTER I WITH OGONEK]
1188
+ "\u012F" => "i"
1189
+
1190
+ # ı [LATIN SMALL LETTER DOTLESS I]
1191
+ "\u0131" => "i"
1192
+
1193
+ # ǐ [LATIN SMALL LETTER I WITH CARON]
1194
+ "\u01D0" => "i"
1195
+
1196
+ # ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
1197
+ "\u0209" => "i"
1198
+
1199
+ # ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
1200
+ "\u020B" => "i"
1201
+
1202
+ # ɨ [LATIN SMALL LETTER I WITH STROKE]
1203
+ "\u0268" => "i"
1204
+
1205
+ # ᴉ [LATIN SMALL LETTER TURNED I]
1206
+ "\u1D09" => "i"
1207
+
1208
+ # ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
1209
+ "\u1D62" => "i"
1210
+
1211
+ # ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
1212
+ "\u1D7C" => "i"
1213
+
1214
+ # ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
1215
+ "\u1D96" => "i"
1216
+
1217
+ # ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
1218
+ "\u1E2D" => "i"
1219
+
1220
+ # ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
1221
+ "\u1E2F" => "i"
1222
+
1223
+ # ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
1224
+ "\u1EC9" => "i"
1225
+
1226
+ # ị [LATIN SMALL LETTER I WITH DOT BELOW]
1227
+ "\u1ECB" => "i"
1228
+
1229
+ # ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
1230
+ "\u2071" => "i"
1231
+
1232
+ # ⓘ [CIRCLED LATIN SMALL LETTER I]
1233
+ "\u24D8" => "i"
1234
+
1235
+ # i [FULLWIDTH LATIN SMALL LETTER I]
1236
+ "\uFF49" => "i"
1237
+
1238
+ # IJ [LATIN CAPITAL LIGATURE IJ]
1239
+ "\u0132" => "IJ"
1240
+
1241
+ # ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
1242
+ "\u24A4" => "(i)"
1243
+
1244
+ # ij [LATIN SMALL LIGATURE IJ]
1245
+ "\u0133" => "ij"
1246
+
1247
+ # Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
1248
+ "\u0134" => "J"
1249
+
1250
+ # Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
1251
+ "\u0248" => "J"
1252
+
1253
+ # ᴊ [LATIN LETTER SMALL CAPITAL J]
1254
+ "\u1D0A" => "J"
1255
+
1256
+ # Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
1257
+ "\u24BF" => "J"
1258
+
1259
+ # J [FULLWIDTH LATIN CAPITAL LETTER J]
1260
+ "\uFF2A" => "J"
1261
+
1262
+ # ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
1263
+ "\u0135" => "j"
1264
+
1265
+ # ǰ [LATIN SMALL LETTER J WITH CARON]
1266
+ "\u01F0" => "j"
1267
+
1268
+ # ȷ [LATIN SMALL LETTER DOTLESS J]
1269
+ "\u0237" => "j"
1270
+
1271
+ # ɉ [LATIN SMALL LETTER J WITH STROKE]
1272
+ "\u0249" => "j"
1273
+
1274
+ # ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
1275
+ "\u025F" => "j"
1276
+
1277
+ # ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
1278
+ "\u0284" => "j"
1279
+
1280
+ # ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
1281
+ "\u029D" => "j"
1282
+
1283
+ # ⓙ [CIRCLED LATIN SMALL LETTER J]
1284
+ "\u24D9" => "j"
1285
+
1286
+ # ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
1287
+ "\u2C7C" => "j"
1288
+
1289
+ # j [FULLWIDTH LATIN SMALL LETTER J]
1290
+ "\uFF4A" => "j"
1291
+
1292
+ # ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
1293
+ "\u24A5" => "(j)"
1294
+
1295
+ # Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
1296
+ "\u0136" => "K"
1297
+
1298
+ # Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
1299
+ "\u0198" => "K"
1300
+
1301
+ # Ǩ [LATIN CAPITAL LETTER K WITH CARON]
1302
+ "\u01E8" => "K"
1303
+
1304
+ # ᴋ [LATIN LETTER SMALL CAPITAL K]
1305
+ "\u1D0B" => "K"
1306
+
1307
+ # Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
1308
+ "\u1E30" => "K"
1309
+
1310
+ # Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
1311
+ "\u1E32" => "K"
1312
+
1313
+ # Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
1314
+ "\u1E34" => "K"
1315
+
1316
+ # Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
1317
+ "\u24C0" => "K"
1318
+
1319
+ # Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
1320
+ "\u2C69" => "K"
1321
+
1322
+ # Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
1323
+ "\uA740" => "K"
1324
+
1325
+ # Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
1326
+ "\uA742" => "K"
1327
+
1328
+ # Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
1329
+ "\uA744" => "K"
1330
+
1331
+ # K [FULLWIDTH LATIN CAPITAL LETTER K]
1332
+ "\uFF2B" => "K"
1333
+
1334
+ # ķ [LATIN SMALL LETTER K WITH CEDILLA]
1335
+ "\u0137" => "k"
1336
+
1337
+ # ƙ [LATIN SMALL LETTER K WITH HOOK]
1338
+ "\u0199" => "k"
1339
+
1340
+ # ǩ [LATIN SMALL LETTER K WITH CARON]
1341
+ "\u01E9" => "k"
1342
+
1343
+ # ʞ [LATIN SMALL LETTER TURNED K]
1344
+ "\u029E" => "k"
1345
+
1346
+ # ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
1347
+ "\u1D84" => "k"
1348
+
1349
+ # ḱ [LATIN SMALL LETTER K WITH ACUTE]
1350
+ "\u1E31" => "k"
1351
+
1352
+ # ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
1353
+ "\u1E33" => "k"
1354
+
1355
+ # ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
1356
+ "\u1E35" => "k"
1357
+
1358
+ # ⓚ [CIRCLED LATIN SMALL LETTER K]
1359
+ "\u24DA" => "k"
1360
+
1361
+ # ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
1362
+ "\u2C6A" => "k"
1363
+
1364
+ # ꝁ [LATIN SMALL LETTER K WITH STROKE]
1365
+ "\uA741" => "k"
1366
+
1367
+ # ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
1368
+ "\uA743" => "k"
1369
+
1370
+ # ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
1371
+ "\uA745" => "k"
1372
+
1373
+ # k [FULLWIDTH LATIN SMALL LETTER K]
1374
+ "\uFF4B" => "k"
1375
+
1376
+ # ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
1377
+ "\u24A6" => "(k)"
1378
+
1379
+ # Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
1380
+ "\u0139" => "L"
1381
+
1382
+ # Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
1383
+ "\u013B" => "L"
1384
+
1385
+ # Ľ [LATIN CAPITAL LETTER L WITH CARON]
1386
+ "\u013D" => "L"
1387
+
1388
+ # Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
1389
+ "\u013F" => "L"
1390
+
1391
+ # Ł [LATIN CAPITAL LETTER L WITH STROKE]
1392
+ "\u0141" => "L"
1393
+
1394
+ # Ƚ [LATIN CAPITAL LETTER L WITH BAR]
1395
+ "\u023D" => "L"
1396
+
1397
+ # ʟ [LATIN LETTER SMALL CAPITAL L]
1398
+ "\u029F" => "L"
1399
+
1400
+ # ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
1401
+ "\u1D0C" => "L"
1402
+
1403
+ # Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
1404
+ "\u1E36" => "L"
1405
+
1406
+ # Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
1407
+ "\u1E38" => "L"
1408
+
1409
+ # Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
1410
+ "\u1E3A" => "L"
1411
+
1412
+ # Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
1413
+ "\u1E3C" => "L"
1414
+
1415
+ # Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
1416
+ "\u24C1" => "L"
1417
+
1418
+ # Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
1419
+ "\u2C60" => "L"
1420
+
1421
+ # Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
1422
+ "\u2C62" => "L"
1423
+
1424
+ # Ꝇ [LATIN CAPITAL LETTER BROKEN L]
1425
+ "\uA746" => "L"
1426
+
1427
+ # Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
1428
+ "\uA748" => "L"
1429
+
1430
+ # Ꞁ [LATIN CAPITAL LETTER TURNED L]
1431
+ "\uA780" => "L"
1432
+
1433
+ # L [FULLWIDTH LATIN CAPITAL LETTER L]
1434
+ "\uFF2C" => "L"
1435
+
1436
+ # ĺ [LATIN SMALL LETTER L WITH ACUTE]
1437
+ "\u013A" => "l"
1438
+
1439
+ # ļ [LATIN SMALL LETTER L WITH CEDILLA]
1440
+ "\u013C" => "l"
1441
+
1442
+ # ľ [LATIN SMALL LETTER L WITH CARON]
1443
+ "\u013E" => "l"
1444
+
1445
+ # ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
1446
+ "\u0140" => "l"
1447
+
1448
+ # ł [LATIN SMALL LETTER L WITH STROKE]
1449
+ "\u0142" => "l"
1450
+
1451
+ # ƚ [LATIN SMALL LETTER L WITH BAR]
1452
+ "\u019A" => "l"
1453
+
1454
+ # ȴ [LATIN SMALL LETTER L WITH CURL]
1455
+ "\u0234" => "l"
1456
+
1457
+ # ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
1458
+ "\u026B" => "l"
1459
+
1460
+ # ɬ [LATIN SMALL LETTER L WITH BELT]
1461
+ "\u026C" => "l"
1462
+
1463
+ # ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
1464
+ "\u026D" => "l"
1465
+
1466
+ # ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
1467
+ "\u1D85" => "l"
1468
+
1469
+ # ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
1470
+ "\u1E37" => "l"
1471
+
1472
+ # ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
1473
+ "\u1E39" => "l"
1474
+
1475
+ # ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
1476
+ "\u1E3B" => "l"
1477
+
1478
+ # ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
1479
+ "\u1E3D" => "l"
1480
+
1481
+ # ⓛ [CIRCLED LATIN SMALL LETTER L]
1482
+ "\u24DB" => "l"
1483
+
1484
+ # ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
1485
+ "\u2C61" => "l"
1486
+
1487
+ # ꝇ [LATIN SMALL LETTER BROKEN L]
1488
+ "\uA747" => "l"
1489
+
1490
+ # ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
1491
+ "\uA749" => "l"
1492
+
1493
+ # ꞁ [LATIN SMALL LETTER TURNED L]
1494
+ "\uA781" => "l"
1495
+
1496
+ # l [FULLWIDTH LATIN SMALL LETTER L]
1497
+ "\uFF4C" => "l"
1498
+
1499
+ # LJ [LATIN CAPITAL LETTER LJ]
1500
+ "\u01C7" => "LJ"
1501
+
1502
+ # Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
1503
+ "\u1EFA" => "LL"
1504
+
1505
+ # Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
1506
+ "\u01C8" => "Lj"
1507
+
1508
+ # ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
1509
+ "\u24A7" => "(l)"
1510
+
1511
+ # lj [LATIN SMALL LETTER LJ]
1512
+ "\u01C9" => "lj"
1513
+
1514
+ # ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
1515
+ "\u1EFB" => "ll"
1516
+
1517
+ # ʪ [LATIN SMALL LETTER LS DIGRAPH]
1518
+ "\u02AA" => "ls"
1519
+
1520
+ # ʫ [LATIN SMALL LETTER LZ DIGRAPH]
1521
+ "\u02AB" => "lz"
1522
+
1523
+ # Ɯ [LATIN CAPITAL LETTER TURNED M]
1524
+ "\u019C" => "M"
1525
+
1526
+ # ᴍ [LATIN LETTER SMALL CAPITAL M]
1527
+ "\u1D0D" => "M"
1528
+
1529
+ # Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
1530
+ "\u1E3E" => "M"
1531
+
1532
+ # Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
1533
+ "\u1E40" => "M"
1534
+
1535
+ # Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
1536
+ "\u1E42" => "M"
1537
+
1538
+ # Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
1539
+ "\u24C2" => "M"
1540
+
1541
+ # Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
1542
+ "\u2C6E" => "M"
1543
+
1544
+ # ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
1545
+ "\uA7FD" => "M"
1546
+
1547
+ # ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
1548
+ "\uA7FF" => "M"
1549
+
1550
+ # M [FULLWIDTH LATIN CAPITAL LETTER M]
1551
+ "\uFF2D" => "M"
1552
+
1553
+ # ɯ [LATIN SMALL LETTER TURNED M]
1554
+ "\u026F" => "m"
1555
+
1556
+ # ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
1557
+ "\u0270" => "m"
1558
+
1559
+ # ɱ [LATIN SMALL LETTER M WITH HOOK]
1560
+ "\u0271" => "m"
1561
+
1562
+ # ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
1563
+ "\u1D6F" => "m"
1564
+
1565
+ # ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
1566
+ "\u1D86" => "m"
1567
+
1568
+ # ḿ [LATIN SMALL LETTER M WITH ACUTE]
1569
+ "\u1E3F" => "m"
1570
+
1571
+ # ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
1572
+ "\u1E41" => "m"
1573
+
1574
+ # ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
1575
+ "\u1E43" => "m"
1576
+
1577
+ # ⓜ [CIRCLED LATIN SMALL LETTER M]
1578
+ "\u24DC" => "m"
1579
+
1580
+ # m [FULLWIDTH LATIN SMALL LETTER M]
1581
+ "\uFF4D" => "m"
1582
+
1583
+ # ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
1584
+ "\u24A8" => "(m)"
1585
+
1586
+ # Ñ [LATIN CAPITAL LETTER N WITH TILDE]
1587
+ "\u00D1" => "N"
1588
+
1589
+ # Ń [LATIN CAPITAL LETTER N WITH ACUTE]
1590
+ "\u0143" => "N"
1591
+
1592
+ # Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
1593
+ "\u0145" => "N"
1594
+
1595
+ # Ň [LATIN CAPITAL LETTER N WITH CARON]
1596
+ "\u0147" => "N"
1597
+
1598
+ # Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
1599
+ "\u014A" => "N"
1600
+
1601
+ # Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
1602
+ "\u019D" => "N"
1603
+
1604
+ # Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
1605
+ "\u01F8" => "N"
1606
+
1607
+ # Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
1608
+ "\u0220" => "N"
1609
+
1610
+ # ɴ [LATIN LETTER SMALL CAPITAL N]
1611
+ "\u0274" => "N"
1612
+
1613
+ # ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
1614
+ "\u1D0E" => "N"
1615
+
1616
+ # Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
1617
+ "\u1E44" => "N"
1618
+
1619
+ # Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
1620
+ "\u1E46" => "N"
1621
+
1622
+ # Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
1623
+ "\u1E48" => "N"
1624
+
1625
+ # Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
1626
+ "\u1E4A" => "N"
1627
+
1628
+ # Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
1629
+ "\u24C3" => "N"
1630
+
1631
+ # N [FULLWIDTH LATIN CAPITAL LETTER N]
1632
+ "\uFF2E" => "N"
1633
+
1634
+ # ñ [LATIN SMALL LETTER N WITH TILDE]
1635
+ "\u00F1" => "n"
1636
+
1637
+ # ń [LATIN SMALL LETTER N WITH ACUTE]
1638
+ "\u0144" => "n"
1639
+
1640
+ # ņ [LATIN SMALL LETTER N WITH CEDILLA]
1641
+ "\u0146" => "n"
1642
+
1643
+ # ň [LATIN SMALL LETTER N WITH CARON]
1644
+ "\u0148" => "n"
1645
+
1646
+ # ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
1647
+ "\u0149" => "n"
1648
+
1649
+ # ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
1650
+ "\u014B" => "n"
1651
+
1652
+ # ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
1653
+ "\u019E" => "n"
1654
+
1655
+ # ǹ [LATIN SMALL LETTER N WITH GRAVE]
1656
+ "\u01F9" => "n"
1657
+
1658
+ # ȵ [LATIN SMALL LETTER N WITH CURL]
1659
+ "\u0235" => "n"
1660
+
1661
+ # ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
1662
+ "\u0272" => "n"
1663
+
1664
+ # ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
1665
+ "\u0273" => "n"
1666
+
1667
+ # ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
1668
+ "\u1D70" => "n"
1669
+
1670
+ # ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
1671
+ "\u1D87" => "n"
1672
+
1673
+ # ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
1674
+ "\u1E45" => "n"
1675
+
1676
+ # ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
1677
+ "\u1E47" => "n"
1678
+
1679
+ # ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
1680
+ "\u1E49" => "n"
1681
+
1682
+ # ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
1683
+ "\u1E4B" => "n"
1684
+
1685
+ # ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
1686
+ "\u207F" => "n"
1687
+
1688
+ # ⓝ [CIRCLED LATIN SMALL LETTER N]
1689
+ "\u24DD" => "n"
1690
+
1691
+ # n [FULLWIDTH LATIN SMALL LETTER N]
1692
+ "\uFF4E" => "n"
1693
+
1694
+ # NJ [LATIN CAPITAL LETTER NJ]
1695
+ "\u01CA" => "NJ"
1696
+
1697
+ # Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
1698
+ "\u01CB" => "Nj"
1699
+
1700
+ # ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
1701
+ "\u24A9" => "(n)"
1702
+
1703
+ # nj [LATIN SMALL LETTER NJ]
1704
+ "\u01CC" => "nj"
1705
+
1706
+ # Ò [LATIN CAPITAL LETTER O WITH GRAVE]
1707
+ "\u00D2" => "O"
1708
+
1709
+ # Ó [LATIN CAPITAL LETTER O WITH ACUTE]
1710
+ "\u00D3" => "O"
1711
+
1712
+ # Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
1713
+ "\u00D4" => "O"
1714
+
1715
+ # Õ [LATIN CAPITAL LETTER O WITH TILDE]
1716
+ "\u00D5" => "O"
1717
+
1718
+ # Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
1719
+ "\u00D6" => "O"
1720
+
1721
+ # Ø [LATIN CAPITAL LETTER O WITH STROKE]
1722
+ "\u00D8" => "O"
1723
+
1724
+ # Ō [LATIN CAPITAL LETTER O WITH MACRON]
1725
+ "\u014C" => "O"
1726
+
1727
+ # Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
1728
+ "\u014E" => "O"
1729
+
1730
+ # Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
1731
+ "\u0150" => "O"
1732
+
1733
+ # Ɔ [LATIN CAPITAL LETTER OPEN O]
1734
+ "\u0186" => "O"
1735
+
1736
+ # Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
1737
+ "\u019F" => "O"
1738
+
1739
+ # Ơ [LATIN CAPITAL LETTER O WITH HORN]
1740
+ "\u01A0" => "O"
1741
+
1742
+ # Ǒ [LATIN CAPITAL LETTER O WITH CARON]
1743
+ "\u01D1" => "O"
1744
+
1745
+ # Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
1746
+ "\u01EA" => "O"
1747
+
1748
+ # Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
1749
+ "\u01EC" => "O"
1750
+
1751
+ # Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
1752
+ "\u01FE" => "O"
1753
+
1754
+ # Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
1755
+ "\u020C" => "O"
1756
+
1757
+ # Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
1758
+ "\u020E" => "O"
1759
+
1760
+ # Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
1761
+ "\u022A" => "O"
1762
+
1763
+ # Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
1764
+ "\u022C" => "O"
1765
+
1766
+ # Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
1767
+ "\u022E" => "O"
1768
+
1769
+ # Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
1770
+ "\u0230" => "O"
1771
+
1772
+ # ᴏ [LATIN LETTER SMALL CAPITAL O]
1773
+ "\u1D0F" => "O"
1774
+
1775
+ # ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
1776
+ "\u1D10" => "O"
1777
+
1778
+ # Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
1779
+ "\u1E4C" => "O"
1780
+
1781
+ # Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
1782
+ "\u1E4E" => "O"
1783
+
1784
+ # Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
1785
+ "\u1E50" => "O"
1786
+
1787
+ # Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
1788
+ "\u1E52" => "O"
1789
+
1790
+ # Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
1791
+ "\u1ECC" => "O"
1792
+
1793
+ # Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
1794
+ "\u1ECE" => "O"
1795
+
1796
+ # Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
1797
+ "\u1ED0" => "O"
1798
+
1799
+ # Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
1800
+ "\u1ED2" => "O"
1801
+
1802
+ # Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
1803
+ "\u1ED4" => "O"
1804
+
1805
+ # Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
1806
+ "\u1ED6" => "O"
1807
+
1808
+ # Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
1809
+ "\u1ED8" => "O"
1810
+
1811
+ # Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
1812
+ "\u1EDA" => "O"
1813
+
1814
+ # Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
1815
+ "\u1EDC" => "O"
1816
+
1817
+ # Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
1818
+ "\u1EDE" => "O"
1819
+
1820
+ # Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
1821
+ "\u1EE0" => "O"
1822
+
1823
+ # Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
1824
+ "\u1EE2" => "O"
1825
+
1826
+ # Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
1827
+ "\u24C4" => "O"
1828
+
1829
+ # Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
1830
+ "\uA74A" => "O"
1831
+
1832
+ # Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
1833
+ "\uA74C" => "O"
1834
+
1835
+ # O [FULLWIDTH LATIN CAPITAL LETTER O]
1836
+ "\uFF2F" => "O"
1837
+
1838
+ # ò [LATIN SMALL LETTER O WITH GRAVE]
1839
+ "\u00F2" => "o"
1840
+
1841
+ # ó [LATIN SMALL LETTER O WITH ACUTE]
1842
+ "\u00F3" => "o"
1843
+
1844
+ # ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
1845
+ "\u00F4" => "o"
1846
+
1847
+ # õ [LATIN SMALL LETTER O WITH TILDE]
1848
+ "\u00F5" => "o"
1849
+
1850
+ # ö [LATIN SMALL LETTER O WITH DIAERESIS]
1851
+ "\u00F6" => "o"
1852
+
1853
+ # ø [LATIN SMALL LETTER O WITH STROKE]
1854
+ "\u00F8" => "o"
1855
+
1856
+ # ō [LATIN SMALL LETTER O WITH MACRON]
1857
+ "\u014D" => "o"
1858
+
1859
+ # ŏ [LATIN SMALL LETTER O WITH BREVE]
1860
+ "\u014F" => "o"
1861
+
1862
+ # ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
1863
+ "\u0151" => "o"
1864
+
1865
+ # ơ [LATIN SMALL LETTER O WITH HORN]
1866
+ "\u01A1" => "o"
1867
+
1868
+ # ǒ [LATIN SMALL LETTER O WITH CARON]
1869
+ "\u01D2" => "o"
1870
+
1871
+ # ǫ [LATIN SMALL LETTER O WITH OGONEK]
1872
+ "\u01EB" => "o"
1873
+
1874
+ # ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
1875
+ "\u01ED" => "o"
1876
+
1877
+ # ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
1878
+ "\u01FF" => "o"
1879
+
1880
+ # ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
1881
+ "\u020D" => "o"
1882
+
1883
+ # ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
1884
+ "\u020F" => "o"
1885
+
1886
+ # ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
1887
+ "\u022B" => "o"
1888
+
1889
+ # ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
1890
+ "\u022D" => "o"
1891
+
1892
+ # ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
1893
+ "\u022F" => "o"
1894
+
1895
+ # ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
1896
+ "\u0231" => "o"
1897
+
1898
+ # ɔ [LATIN SMALL LETTER OPEN O]
1899
+ "\u0254" => "o"
1900
+
1901
+ # ɵ [LATIN SMALL LETTER BARRED O]
1902
+ "\u0275" => "o"
1903
+
1904
+ # ᴖ [LATIN SMALL LETTER TOP HALF O]
1905
+ "\u1D16" => "o"
1906
+
1907
+ # ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
1908
+ "\u1D17" => "o"
1909
+
1910
+ # ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
1911
+ "\u1D97" => "o"
1912
+
1913
+ # ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
1914
+ "\u1E4D" => "o"
1915
+
1916
+ # ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
1917
+ "\u1E4F" => "o"
1918
+
1919
+ # ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
1920
+ "\u1E51" => "o"
1921
+
1922
+ # ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
1923
+ "\u1E53" => "o"
1924
+
1925
+ # ọ [LATIN SMALL LETTER O WITH DOT BELOW]
1926
+ "\u1ECD" => "o"
1927
+
1928
+ # ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
1929
+ "\u1ECF" => "o"
1930
+
1931
+ # ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
1932
+ "\u1ED1" => "o"
1933
+
1934
+ # ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
1935
+ "\u1ED3" => "o"
1936
+
1937
+ # ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
1938
+ "\u1ED5" => "o"
1939
+
1940
+ # ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
1941
+ "\u1ED7" => "o"
1942
+
1943
+ # ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
1944
+ "\u1ED9" => "o"
1945
+
1946
+ # ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
1947
+ "\u1EDB" => "o"
1948
+
1949
+ # ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
1950
+ "\u1EDD" => "o"
1951
+
1952
+ # ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
1953
+ "\u1EDF" => "o"
1954
+
1955
+ # ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
1956
+ "\u1EE1" => "o"
1957
+
1958
+ # ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
1959
+ "\u1EE3" => "o"
1960
+
1961
+ # ₒ [LATIN SUBSCRIPT SMALL LETTER O]
1962
+ "\u2092" => "o"
1963
+
1964
+ # ⓞ [CIRCLED LATIN SMALL LETTER O]
1965
+ "\u24DE" => "o"
1966
+
1967
+ # ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
1968
+ "\u2C7A" => "o"
1969
+
1970
+ # ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
1971
+ "\uA74B" => "o"
1972
+
1973
+ # ꝍ [LATIN SMALL LETTER O WITH LOOP]
1974
+ "\uA74D" => "o"
1975
+
1976
+ # o [FULLWIDTH LATIN SMALL LETTER O]
1977
+ "\uFF4F" => "o"
1978
+
1979
+ # Π[LATIN CAPITAL LIGATURE OE]
1980
+ "\u0152" => "OE"
1981
+
1982
+ # ɶ [LATIN LETTER SMALL CAPITAL OE]
1983
+ "\u0276" => "OE"
1984
+
1985
+ # Ꝏ [LATIN CAPITAL LETTER OO]
1986
+ "\uA74E" => "OO"
1987
+
1988
+ # Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
1989
+ "\u0222" => "OU"
1990
+
1991
+ # ᴕ [LATIN LETTER SMALL CAPITAL OU]
1992
+ "\u1D15" => "OU"
1993
+
1994
+ # ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
1995
+ "\u24AA" => "(o)"
1996
+
1997
+ # œ [LATIN SMALL LIGATURE OE]
1998
+ "\u0153" => "oe"
1999
+
2000
+ # ᴔ [LATIN SMALL LETTER TURNED OE]
2001
+ "\u1D14" => "oe"
2002
+
2003
+ # ꝏ [LATIN SMALL LETTER OO]
2004
+ "\uA74F" => "oo"
2005
+
2006
+ # ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
2007
+ "\u0223" => "ou"
2008
+
2009
+ # Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
2010
+ "\u01A4" => "P"
2011
+
2012
+ # ᴘ [LATIN LETTER SMALL CAPITAL P]
2013
+ "\u1D18" => "P"
2014
+
2015
+ # Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
2016
+ "\u1E54" => "P"
2017
+
2018
+ # Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
2019
+ "\u1E56" => "P"
2020
+
2021
+ # Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
2022
+ "\u24C5" => "P"
2023
+
2024
+ # Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
2025
+ "\u2C63" => "P"
2026
+
2027
+ # Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
2028
+ "\uA750" => "P"
2029
+
2030
+ # Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
2031
+ "\uA752" => "P"
2032
+
2033
+ # Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
2034
+ "\uA754" => "P"
2035
+
2036
+ # P [FULLWIDTH LATIN CAPITAL LETTER P]
2037
+ "\uFF30" => "P"
2038
+
2039
+ # ƥ [LATIN SMALL LETTER P WITH HOOK]
2040
+ "\u01A5" => "p"
2041
+
2042
+ # ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
2043
+ "\u1D71" => "p"
2044
+
2045
+ # ᵽ [LATIN SMALL LETTER P WITH STROKE]
2046
+ "\u1D7D" => "p"
2047
+
2048
+ # ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
2049
+ "\u1D88" => "p"
2050
+
2051
+ # ṕ [LATIN SMALL LETTER P WITH ACUTE]
2052
+ "\u1E55" => "p"
2053
+
2054
+ # ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
2055
+ "\u1E57" => "p"
2056
+
2057
+ # ⓟ [CIRCLED LATIN SMALL LETTER P]
2058
+ "\u24DF" => "p"
2059
+
2060
+ # ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
2061
+ "\uA751" => "p"
2062
+
2063
+ # ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
2064
+ "\uA753" => "p"
2065
+
2066
+ # ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
2067
+ "\uA755" => "p"
2068
+
2069
+ # ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
2070
+ "\uA7FC" => "p"
2071
+
2072
+ # p [FULLWIDTH LATIN SMALL LETTER P]
2073
+ "\uFF50" => "p"
2074
+
2075
+ # ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
2076
+ "\u24AB" => "(p)"
2077
+
2078
+ # Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
2079
+ "\u024A" => "Q"
2080
+
2081
+ # Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
2082
+ "\u24C6" => "Q"
2083
+
2084
+ # Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
2085
+ "\uA756" => "Q"
2086
+
2087
+ # Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
2088
+ "\uA758" => "Q"
2089
+
2090
+ # Q [FULLWIDTH LATIN CAPITAL LETTER Q]
2091
+ "\uFF31" => "Q"
2092
+
2093
+ # ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
2094
+ "\u0138" => "q"
2095
+
2096
+ # ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
2097
+ "\u024B" => "q"
2098
+
2099
+ # ʠ [LATIN SMALL LETTER Q WITH HOOK]
2100
+ "\u02A0" => "q"
2101
+
2102
+ # ⓠ [CIRCLED LATIN SMALL LETTER Q]
2103
+ "\u24E0" => "q"
2104
+
2105
+ # ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
2106
+ "\uA757" => "q"
2107
+
2108
+ # ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
2109
+ "\uA759" => "q"
2110
+
2111
+ # q [FULLWIDTH LATIN SMALL LETTER Q]
2112
+ "\uFF51" => "q"
2113
+
2114
+ # ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
2115
+ "\u24AC" => "(q)"
2116
+
2117
+ # ȹ [LATIN SMALL LETTER QP DIGRAPH]
2118
+ "\u0239" => "qp"
2119
+
2120
+ # Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
2121
+ "\u0154" => "R"
2122
+
2123
+ # Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
2124
+ "\u0156" => "R"
2125
+
2126
+ # Ř [LATIN CAPITAL LETTER R WITH CARON]
2127
+ "\u0158" => "R"
2128
+
2129
+ # Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
2130
+ "\u0210" => "R"
2131
+
2132
+ # Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
2133
+ "\u0212" => "R"
2134
+
2135
+ # Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
2136
+ "\u024C" => "R"
2137
+
2138
+ # ʀ [LATIN LETTER SMALL CAPITAL R]
2139
+ "\u0280" => "R"
2140
+
2141
+ # ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
2142
+ "\u0281" => "R"
2143
+
2144
+ # ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
2145
+ "\u1D19" => "R"
2146
+
2147
+ # ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
2148
+ "\u1D1A" => "R"
2149
+
2150
+ # Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
2151
+ "\u1E58" => "R"
2152
+
2153
+ # Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
2154
+ "\u1E5A" => "R"
2155
+
2156
+ # Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
2157
+ "\u1E5C" => "R"
2158
+
2159
+ # Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
2160
+ "\u1E5E" => "R"
2161
+
2162
+ # Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
2163
+ "\u24C7" => "R"
2164
+
2165
+ # Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
2166
+ "\u2C64" => "R"
2167
+
2168
+ # Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
2169
+ "\uA75A" => "R"
2170
+
2171
+ # Ꞃ [LATIN CAPITAL LETTER INSULAR R]
2172
+ "\uA782" => "R"
2173
+
2174
+ # R [FULLWIDTH LATIN CAPITAL LETTER R]
2175
+ "\uFF32" => "R"
2176
+
2177
+ # ŕ [LATIN SMALL LETTER R WITH ACUTE]
2178
+ "\u0155" => "r"
2179
+
2180
+ # ŗ [LATIN SMALL LETTER R WITH CEDILLA]
2181
+ "\u0157" => "r"
2182
+
2183
+ # ř [LATIN SMALL LETTER R WITH CARON]
2184
+ "\u0159" => "r"
2185
+
2186
+ # ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
2187
+ "\u0211" => "r"
2188
+
2189
+ # ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
2190
+ "\u0213" => "r"
2191
+
2192
+ # ɍ [LATIN SMALL LETTER R WITH STROKE]
2193
+ "\u024D" => "r"
2194
+
2195
+ # ɼ [LATIN SMALL LETTER R WITH LONG LEG]
2196
+ "\u027C" => "r"
2197
+
2198
+ # ɽ [LATIN SMALL LETTER R WITH TAIL]
2199
+ "\u027D" => "r"
2200
+
2201
+ # ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
2202
+ "\u027E" => "r"
2203
+
2204
+ # ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
2205
+ "\u027F" => "r"
2206
+
2207
+ # ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
2208
+ "\u1D63" => "r"
2209
+
2210
+ # ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
2211
+ "\u1D72" => "r"
2212
+
2213
+ # ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
2214
+ "\u1D73" => "r"
2215
+
2216
+ # ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
2217
+ "\u1D89" => "r"
2218
+
2219
+ # ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
2220
+ "\u1E59" => "r"
2221
+
2222
+ # ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
2223
+ "\u1E5B" => "r"
2224
+
2225
+ # ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
2226
+ "\u1E5D" => "r"
2227
+
2228
+ # ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
2229
+ "\u1E5F" => "r"
2230
+
2231
+ # ⓡ [CIRCLED LATIN SMALL LETTER R]
2232
+ "\u24E1" => "r"
2233
+
2234
+ # ꝛ [LATIN SMALL LETTER R ROTUNDA]
2235
+ "\uA75B" => "r"
2236
+
2237
+ # ꞃ [LATIN SMALL LETTER INSULAR R]
2238
+ "\uA783" => "r"
2239
+
2240
+ # r [FULLWIDTH LATIN SMALL LETTER R]
2241
+ "\uFF52" => "r"
2242
+
2243
+ # ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
2244
+ "\u24AD" => "(r)"
2245
+
2246
+ # Ś [LATIN CAPITAL LETTER S WITH ACUTE]
2247
+ "\u015A" => "S"
2248
+
2249
+ # Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
2250
+ "\u015C" => "S"
2251
+
2252
+ # Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
2253
+ "\u015E" => "S"
2254
+
2255
+ # Š [LATIN CAPITAL LETTER S WITH CARON]
2256
+ "\u0160" => "S"
2257
+
2258
+ # Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
2259
+ "\u0218" => "S"
2260
+
2261
+ # Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
2262
+ "\u1E60" => "S"
2263
+
2264
+ # Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
2265
+ "\u1E62" => "S"
2266
+
2267
+ # Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
2268
+ "\u1E64" => "S"
2269
+
2270
+ # Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
2271
+ "\u1E66" => "S"
2272
+
2273
+ # Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
2274
+ "\u1E68" => "S"
2275
+
2276
+ # Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
2277
+ "\u24C8" => "S"
2278
+
2279
+ # ꜱ [LATIN LETTER SMALL CAPITAL S]
2280
+ "\uA731" => "S"
2281
+
2282
+ # ꞅ [LATIN SMALL LETTER INSULAR S]
2283
+ "\uA785" => "S"
2284
+
2285
+ # S [FULLWIDTH LATIN CAPITAL LETTER S]
2286
+ "\uFF33" => "S"
2287
+
2288
+ # ś [LATIN SMALL LETTER S WITH ACUTE]
2289
+ "\u015B" => "s"
2290
+
2291
+ # ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
2292
+ "\u015D" => "s"
2293
+
2294
+ # ş [LATIN SMALL LETTER S WITH CEDILLA]
2295
+ "\u015F" => "s"
2296
+
2297
+ # š [LATIN SMALL LETTER S WITH CARON]
2298
+ "\u0161" => "s"
2299
+
2300
+ # ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
2301
+ "\u017F" => "s"
2302
+
2303
+ # ș [LATIN SMALL LETTER S WITH COMMA BELOW]
2304
+ "\u0219" => "s"
2305
+
2306
+ # ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
2307
+ "\u023F" => "s"
2308
+
2309
+ # ʂ [LATIN SMALL LETTER S WITH HOOK]
2310
+ "\u0282" => "s"
2311
+
2312
+ # ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
2313
+ "\u1D74" => "s"
2314
+
2315
+ # ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
2316
+ "\u1D8A" => "s"
2317
+
2318
+ # ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
2319
+ "\u1E61" => "s"
2320
+
2321
+ # ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
2322
+ "\u1E63" => "s"
2323
+
2324
+ # ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
2325
+ "\u1E65" => "s"
2326
+
2327
+ # ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
2328
+ "\u1E67" => "s"
2329
+
2330
+ # ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
2331
+ "\u1E69" => "s"
2332
+
2333
+ # ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
2334
+ "\u1E9C" => "s"
2335
+
2336
+ # ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
2337
+ "\u1E9D" => "s"
2338
+
2339
+ # ⓢ [CIRCLED LATIN SMALL LETTER S]
2340
+ "\u24E2" => "s"
2341
+
2342
+ # Ꞅ [LATIN CAPITAL LETTER INSULAR S]
2343
+ "\uA784" => "s"
2344
+
2345
+ # s [FULLWIDTH LATIN SMALL LETTER S]
2346
+ "\uFF53" => "s"
2347
+
2348
+ # ẞ [LATIN CAPITAL LETTER SHARP S]
2349
+ "\u1E9E" => "SS"
2350
+
2351
+ # ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
2352
+ "\u24AE" => "(s)"
2353
+
2354
+ # ß [LATIN SMALL LETTER SHARP S]
2355
+ "\u00DF" => "ss"
2356
+
2357
+ # st [LATIN SMALL LIGATURE ST]
2358
+ "\uFB06" => "st"
2359
+
2360
+ # Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
2361
+ "\u0162" => "T"
2362
+
2363
+ # Ť [LATIN CAPITAL LETTER T WITH CARON]
2364
+ "\u0164" => "T"
2365
+
2366
+ # Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
2367
+ "\u0166" => "T"
2368
+
2369
+ # Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
2370
+ "\u01AC" => "T"
2371
+
2372
+ # Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
2373
+ "\u01AE" => "T"
2374
+
2375
+ # Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
2376
+ "\u021A" => "T"
2377
+
2378
+ # Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
2379
+ "\u023E" => "T"
2380
+
2381
+ # ᴛ [LATIN LETTER SMALL CAPITAL T]
2382
+ "\u1D1B" => "T"
2383
+
2384
+ # Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
2385
+ "\u1E6A" => "T"
2386
+
2387
+ # Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
2388
+ "\u1E6C" => "T"
2389
+
2390
+ # Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
2391
+ "\u1E6E" => "T"
2392
+
2393
+ # Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
2394
+ "\u1E70" => "T"
2395
+
2396
+ # Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
2397
+ "\u24C9" => "T"
2398
+
2399
+ # Ꞇ [LATIN CAPITAL LETTER INSULAR T]
2400
+ "\uA786" => "T"
2401
+
2402
+ # T [FULLWIDTH LATIN CAPITAL LETTER T]
2403
+ "\uFF34" => "T"
2404
+
2405
+ # ţ [LATIN SMALL LETTER T WITH CEDILLA]
2406
+ "\u0163" => "t"
2407
+
2408
+ # ť [LATIN SMALL LETTER T WITH CARON]
2409
+ "\u0165" => "t"
2410
+
2411
+ # ŧ [LATIN SMALL LETTER T WITH STROKE]
2412
+ "\u0167" => "t"
2413
+
2414
+ # ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
2415
+ "\u01AB" => "t"
2416
+
2417
+ # ƭ [LATIN SMALL LETTER T WITH HOOK]
2418
+ "\u01AD" => "t"
2419
+
2420
+ # ț [LATIN SMALL LETTER T WITH COMMA BELOW]
2421
+ "\u021B" => "t"
2422
+
2423
+ # ȶ [LATIN SMALL LETTER T WITH CURL]
2424
+ "\u0236" => "t"
2425
+
2426
+ # ʇ [LATIN SMALL LETTER TURNED T]
2427
+ "\u0287" => "t"
2428
+
2429
+ # ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
2430
+ "\u0288" => "t"
2431
+
2432
+ # ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
2433
+ "\u1D75" => "t"
2434
+
2435
+ # ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
2436
+ "\u1E6B" => "t"
2437
+
2438
+ # ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
2439
+ "\u1E6D" => "t"
2440
+
2441
+ # ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
2442
+ "\u1E6F" => "t"
2443
+
2444
+ # ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
2445
+ "\u1E71" => "t"
2446
+
2447
+ # ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
2448
+ "\u1E97" => "t"
2449
+
2450
+ # ⓣ [CIRCLED LATIN SMALL LETTER T]
2451
+ "\u24E3" => "t"
2452
+
2453
+ # ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
2454
+ "\u2C66" => "t"
2455
+
2456
+ # t [FULLWIDTH LATIN SMALL LETTER T]
2457
+ "\uFF54" => "t"
2458
+
2459
+ # Þ [LATIN CAPITAL LETTER THORN]
2460
+ "\u00DE" => "TH"
2461
+
2462
+ # Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
2463
+ "\uA766" => "TH"
2464
+
2465
+ # Ꜩ [LATIN CAPITAL LETTER TZ]
2466
+ "\uA728" => "TZ"
2467
+
2468
+ # ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
2469
+ "\u24AF" => "(t)"
2470
+
2471
+ # ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
2472
+ "\u02A8" => "tc"
2473
+
2474
+ # þ [LATIN SMALL LETTER THORN]
2475
+ "\u00FE" => "th"
2476
+
2477
+ # ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
2478
+ "\u1D7A" => "th"
2479
+
2480
+ # ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
2481
+ "\uA767" => "th"
2482
+
2483
+ # ʦ [LATIN SMALL LETTER TS DIGRAPH]
2484
+ "\u02A6" => "ts"
2485
+
2486
+ # ꜩ [LATIN SMALL LETTER TZ]
2487
+ "\uA729" => "tz"
2488
+
2489
+ # Ù [LATIN CAPITAL LETTER U WITH GRAVE]
2490
+ "\u00D9" => "U"
2491
+
2492
+ # Ú [LATIN CAPITAL LETTER U WITH ACUTE]
2493
+ "\u00DA" => "U"
2494
+
2495
+ # Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
2496
+ "\u00DB" => "U"
2497
+
2498
+ # Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
2499
+ "\u00DC" => "U"
2500
+
2501
+ # Ũ [LATIN CAPITAL LETTER U WITH TILDE]
2502
+ "\u0168" => "U"
2503
+
2504
+ # Ū [LATIN CAPITAL LETTER U WITH MACRON]
2505
+ "\u016A" => "U"
2506
+
2507
+ # Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
2508
+ "\u016C" => "U"
2509
+
2510
+ # Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
2511
+ "\u016E" => "U"
2512
+
2513
+ # Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
2514
+ "\u0170" => "U"
2515
+
2516
+ # Ų [LATIN CAPITAL LETTER U WITH OGONEK]
2517
+ "\u0172" => "U"
2518
+
2519
+ # Ư [LATIN CAPITAL LETTER U WITH HORN]
2520
+ "\u01AF" => "U"
2521
+
2522
+ # Ǔ [LATIN CAPITAL LETTER U WITH CARON]
2523
+ "\u01D3" => "U"
2524
+
2525
+ # Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
2526
+ "\u01D5" => "U"
2527
+
2528
+ # Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
2529
+ "\u01D7" => "U"
2530
+
2531
+ # Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
2532
+ "\u01D9" => "U"
2533
+
2534
+ # Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
2535
+ "\u01DB" => "U"
2536
+
2537
+ # Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
2538
+ "\u0214" => "U"
2539
+
2540
+ # Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
2541
+ "\u0216" => "U"
2542
+
2543
+ # Ʉ [LATIN CAPITAL LETTER U BAR]
2544
+ "\u0244" => "U"
2545
+
2546
+ # ᴜ [LATIN LETTER SMALL CAPITAL U]
2547
+ "\u1D1C" => "U"
2548
+
2549
+ # ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
2550
+ "\u1D7E" => "U"
2551
+
2552
+ # Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
2553
+ "\u1E72" => "U"
2554
+
2555
+ # Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
2556
+ "\u1E74" => "U"
2557
+
2558
+ # Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
2559
+ "\u1E76" => "U"
2560
+
2561
+ # Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
2562
+ "\u1E78" => "U"
2563
+
2564
+ # Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
2565
+ "\u1E7A" => "U"
2566
+
2567
+ # Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
2568
+ "\u1EE4" => "U"
2569
+
2570
+ # Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
2571
+ "\u1EE6" => "U"
2572
+
2573
+ # Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
2574
+ "\u1EE8" => "U"
2575
+
2576
+ # Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
2577
+ "\u1EEA" => "U"
2578
+
2579
+ # Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
2580
+ "\u1EEC" => "U"
2581
+
2582
+ # Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
2583
+ "\u1EEE" => "U"
2584
+
2585
+ # Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
2586
+ "\u1EF0" => "U"
2587
+
2588
+ # Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
2589
+ "\u24CA" => "U"
2590
+
2591
+ # U [FULLWIDTH LATIN CAPITAL LETTER U]
2592
+ "\uFF35" => "U"
2593
+
2594
+ # ù [LATIN SMALL LETTER U WITH GRAVE]
2595
+ "\u00F9" => "u"
2596
+
2597
+ # ú [LATIN SMALL LETTER U WITH ACUTE]
2598
+ "\u00FA" => "u"
2599
+
2600
+ # û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
2601
+ "\u00FB" => "u"
2602
+
2603
+ # ü [LATIN SMALL LETTER U WITH DIAERESIS]
2604
+ "\u00FC" => "u"
2605
+
2606
+ # ũ [LATIN SMALL LETTER U WITH TILDE]
2607
+ "\u0169" => "u"
2608
+
2609
+ # ū [LATIN SMALL LETTER U WITH MACRON]
2610
+ "\u016B" => "u"
2611
+
2612
+ # ŭ [LATIN SMALL LETTER U WITH BREVE]
2613
+ "\u016D" => "u"
2614
+
2615
+ # ů [LATIN SMALL LETTER U WITH RING ABOVE]
2616
+ "\u016F" => "u"
2617
+
2618
+ # ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
2619
+ "\u0171" => "u"
2620
+
2621
+ # ų [LATIN SMALL LETTER U WITH OGONEK]
2622
+ "\u0173" => "u"
2623
+
2624
+ # ư [LATIN SMALL LETTER U WITH HORN]
2625
+ "\u01B0" => "u"
2626
+
2627
+ # ǔ [LATIN SMALL LETTER U WITH CARON]
2628
+ "\u01D4" => "u"
2629
+
2630
+ # ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
2631
+ "\u01D6" => "u"
2632
+
2633
+ # ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
2634
+ "\u01D8" => "u"
2635
+
2636
+ # ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
2637
+ "\u01DA" => "u"
2638
+
2639
+ # ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
2640
+ "\u01DC" => "u"
2641
+
2642
+ # ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
2643
+ "\u0215" => "u"
2644
+
2645
+ # ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
2646
+ "\u0217" => "u"
2647
+
2648
+ # ʉ [LATIN SMALL LETTER U BAR]
2649
+ "\u0289" => "u"
2650
+
2651
+ # ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
2652
+ "\u1D64" => "u"
2653
+
2654
+ # ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
2655
+ "\u1D99" => "u"
2656
+
2657
+ # ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
2658
+ "\u1E73" => "u"
2659
+
2660
+ # ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
2661
+ "\u1E75" => "u"
2662
+
2663
+ # ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
2664
+ "\u1E77" => "u"
2665
+
2666
+ # ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
2667
+ "\u1E79" => "u"
2668
+
2669
+ # ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
2670
+ "\u1E7B" => "u"
2671
+
2672
+ # ụ [LATIN SMALL LETTER U WITH DOT BELOW]
2673
+ "\u1EE5" => "u"
2674
+
2675
+ # ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
2676
+ "\u1EE7" => "u"
2677
+
2678
+ # ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
2679
+ "\u1EE9" => "u"
2680
+
2681
+ # ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
2682
+ "\u1EEB" => "u"
2683
+
2684
+ # ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
2685
+ "\u1EED" => "u"
2686
+
2687
+ # ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
2688
+ "\u1EEF" => "u"
2689
+
2690
+ # ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
2691
+ "\u1EF1" => "u"
2692
+
2693
+ # ⓤ [CIRCLED LATIN SMALL LETTER U]
2694
+ "\u24E4" => "u"
2695
+
2696
+ # u [FULLWIDTH LATIN SMALL LETTER U]
2697
+ "\uFF55" => "u"
2698
+
2699
+ # ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
2700
+ "\u24B0" => "(u)"
2701
+
2702
+ # ᵫ [LATIN SMALL LETTER UE]
2703
+ "\u1D6B" => "ue"
2704
+
2705
+ # Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
2706
+ "\u01B2" => "V"
2707
+
2708
+ # Ʌ [LATIN CAPITAL LETTER TURNED V]
2709
+ "\u0245" => "V"
2710
+
2711
+ # ᴠ [LATIN LETTER SMALL CAPITAL V]
2712
+ "\u1D20" => "V"
2713
+
2714
+ # Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
2715
+ "\u1E7C" => "V"
2716
+
2717
+ # Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
2718
+ "\u1E7E" => "V"
2719
+
2720
+ # Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
2721
+ "\u1EFC" => "V"
2722
+
2723
+ # Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
2724
+ "\u24CB" => "V"
2725
+
2726
+ # Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
2727
+ "\uA75E" => "V"
2728
+
2729
+ # Ꝩ [LATIN CAPITAL LETTER VEND]
2730
+ "\uA768" => "V"
2731
+
2732
+ # V [FULLWIDTH LATIN CAPITAL LETTER V]
2733
+ "\uFF36" => "V"
2734
+
2735
+ # ʋ [LATIN SMALL LETTER V WITH HOOK]
2736
+ "\u028B" => "v"
2737
+
2738
+ # ʌ [LATIN SMALL LETTER TURNED V]
2739
+ "\u028C" => "v"
2740
+
2741
+ # ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
2742
+ "\u1D65" => "v"
2743
+
2744
+ # ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
2745
+ "\u1D8C" => "v"
2746
+
2747
+ # ṽ [LATIN SMALL LETTER V WITH TILDE]
2748
+ "\u1E7D" => "v"
2749
+
2750
+ # ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
2751
+ "\u1E7F" => "v"
2752
+
2753
+ # ⓥ [CIRCLED LATIN SMALL LETTER V]
2754
+ "\u24E5" => "v"
2755
+
2756
+ # ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
2757
+ "\u2C71" => "v"
2758
+
2759
+ # ⱴ [LATIN SMALL LETTER V WITH CURL]
2760
+ "\u2C74" => "v"
2761
+
2762
+ # ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
2763
+ "\uA75F" => "v"
2764
+
2765
+ # v [FULLWIDTH LATIN SMALL LETTER V]
2766
+ "\uFF56" => "v"
2767
+
2768
+ # Ꝡ [LATIN CAPITAL LETTER VY]
2769
+ "\uA760" => "VY"
2770
+
2771
+ # ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
2772
+ "\u24B1" => "(v)"
2773
+
2774
+ # ꝡ [LATIN SMALL LETTER VY]
2775
+ "\uA761" => "vy"
2776
+
2777
+ # Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
2778
+ "\u0174" => "W"
2779
+
2780
+ # Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
2781
+ "\u01F7" => "W"
2782
+
2783
+ # ᴡ [LATIN LETTER SMALL CAPITAL W]
2784
+ "\u1D21" => "W"
2785
+
2786
+ # Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
2787
+ "\u1E80" => "W"
2788
+
2789
+ # Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
2790
+ "\u1E82" => "W"
2791
+
2792
+ # Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
2793
+ "\u1E84" => "W"
2794
+
2795
+ # Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
2796
+ "\u1E86" => "W"
2797
+
2798
+ # Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
2799
+ "\u1E88" => "W"
2800
+
2801
+ # Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
2802
+ "\u24CC" => "W"
2803
+
2804
+ # Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
2805
+ "\u2C72" => "W"
2806
+
2807
+ # W [FULLWIDTH LATIN CAPITAL LETTER W]
2808
+ "\uFF37" => "W"
2809
+
2810
+ # ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
2811
+ "\u0175" => "w"
2812
+
2813
+ # ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
2814
+ "\u01BF" => "w"
2815
+
2816
+ # ʍ [LATIN SMALL LETTER TURNED W]
2817
+ "\u028D" => "w"
2818
+
2819
+ # ẁ [LATIN SMALL LETTER W WITH GRAVE]
2820
+ "\u1E81" => "w"
2821
+
2822
+ # ẃ [LATIN SMALL LETTER W WITH ACUTE]
2823
+ "\u1E83" => "w"
2824
+
2825
+ # ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
2826
+ "\u1E85" => "w"
2827
+
2828
+ # ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
2829
+ "\u1E87" => "w"
2830
+
2831
+ # ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
2832
+ "\u1E89" => "w"
2833
+
2834
+ # ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
2835
+ "\u1E98" => "w"
2836
+
2837
+ # ⓦ [CIRCLED LATIN SMALL LETTER W]
2838
+ "\u24E6" => "w"
2839
+
2840
+ # ⱳ [LATIN SMALL LETTER W WITH HOOK]
2841
+ "\u2C73" => "w"
2842
+
2843
+ # w [FULLWIDTH LATIN SMALL LETTER W]
2844
+ "\uFF57" => "w"
2845
+
2846
+ # ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
2847
+ "\u24B2" => "(w)"
2848
+
2849
+ # Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
2850
+ "\u1E8A" => "X"
2851
+
2852
+ # Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
2853
+ "\u1E8C" => "X"
2854
+
2855
+ # Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
2856
+ "\u24CD" => "X"
2857
+
2858
+ # X [FULLWIDTH LATIN CAPITAL LETTER X]
2859
+ "\uFF38" => "X"
2860
+
2861
+ # ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
2862
+ "\u1D8D" => "x"
2863
+
2864
+ # ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
2865
+ "\u1E8B" => "x"
2866
+
2867
+ # ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
2868
+ "\u1E8D" => "x"
2869
+
2870
+ # ₓ [LATIN SUBSCRIPT SMALL LETTER X]
2871
+ "\u2093" => "x"
2872
+
2873
+ # ⓧ [CIRCLED LATIN SMALL LETTER X]
2874
+ "\u24E7" => "x"
2875
+
2876
+ # x [FULLWIDTH LATIN SMALL LETTER X]
2877
+ "\uFF58" => "x"
2878
+
2879
+ # ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
2880
+ "\u24B3" => "(x)"
2881
+
2882
+ # Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
2883
+ "\u00DD" => "Y"
2884
+
2885
+ # Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
2886
+ "\u0176" => "Y"
2887
+
2888
+ # Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
2889
+ "\u0178" => "Y"
2890
+
2891
+ # Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
2892
+ "\u01B3" => "Y"
2893
+
2894
+ # Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
2895
+ "\u0232" => "Y"
2896
+
2897
+ # Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
2898
+ "\u024E" => "Y"
2899
+
2900
+ # ʏ [LATIN LETTER SMALL CAPITAL Y]
2901
+ "\u028F" => "Y"
2902
+
2903
+ # Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
2904
+ "\u1E8E" => "Y"
2905
+
2906
+ # Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
2907
+ "\u1EF2" => "Y"
2908
+
2909
+ # Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
2910
+ "\u1EF4" => "Y"
2911
+
2912
+ # Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
2913
+ "\u1EF6" => "Y"
2914
+
2915
+ # Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
2916
+ "\u1EF8" => "Y"
2917
+
2918
+ # Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
2919
+ "\u1EFE" => "Y"
2920
+
2921
+ # Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
2922
+ "\u24CE" => "Y"
2923
+
2924
+ # Y [FULLWIDTH LATIN CAPITAL LETTER Y]
2925
+ "\uFF39" => "Y"
2926
+
2927
+ # ý [LATIN SMALL LETTER Y WITH ACUTE]
2928
+ "\u00FD" => "y"
2929
+
2930
+ # ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
2931
+ "\u00FF" => "y"
2932
+
2933
+ # ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
2934
+ "\u0177" => "y"
2935
+
2936
+ # ƴ [LATIN SMALL LETTER Y WITH HOOK]
2937
+ "\u01B4" => "y"
2938
+
2939
+ # ȳ [LATIN SMALL LETTER Y WITH MACRON]
2940
+ "\u0233" => "y"
2941
+
2942
+ # ɏ [LATIN SMALL LETTER Y WITH STROKE]
2943
+ "\u024F" => "y"
2944
+
2945
+ # ʎ [LATIN SMALL LETTER TURNED Y]
2946
+ "\u028E" => "y"
2947
+
2948
+ # ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
2949
+ "\u1E8F" => "y"
2950
+
2951
+ # ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
2952
+ "\u1E99" => "y"
2953
+
2954
+ # ỳ [LATIN SMALL LETTER Y WITH GRAVE]
2955
+ "\u1EF3" => "y"
2956
+
2957
+ # ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
2958
+ "\u1EF5" => "y"
2959
+
2960
+ # ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
2961
+ "\u1EF7" => "y"
2962
+
2963
+ # ỹ [LATIN SMALL LETTER Y WITH TILDE]
2964
+ "\u1EF9" => "y"
2965
+
2966
+ # ỿ [LATIN SMALL LETTER Y WITH LOOP]
2967
+ "\u1EFF" => "y"
2968
+
2969
+ # ⓨ [CIRCLED LATIN SMALL LETTER Y]
2970
+ "\u24E8" => "y"
2971
+
2972
+ # y [FULLWIDTH LATIN SMALL LETTER Y]
2973
+ "\uFF59" => "y"
2974
+
2975
+ # ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
2976
+ "\u24B4" => "(y)"
2977
+
2978
+ # Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
2979
+ "\u0179" => "Z"
2980
+
2981
+ # Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
2982
+ "\u017B" => "Z"
2983
+
2984
+ # Ž [LATIN CAPITAL LETTER Z WITH CARON]
2985
+ "\u017D" => "Z"
2986
+
2987
+ # Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
2988
+ "\u01B5" => "Z"
2989
+
2990
+ # Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
2991
+ "\u021C" => "Z"
2992
+
2993
+ # Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
2994
+ "\u0224" => "Z"
2995
+
2996
+ # ᴢ [LATIN LETTER SMALL CAPITAL Z]
2997
+ "\u1D22" => "Z"
2998
+
2999
+ # Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
3000
+ "\u1E90" => "Z"
3001
+
3002
+ # Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
3003
+ "\u1E92" => "Z"
3004
+
3005
+ # Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
3006
+ "\u1E94" => "Z"
3007
+
3008
+ # Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
3009
+ "\u24CF" => "Z"
3010
+
3011
+ # Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
3012
+ "\u2C6B" => "Z"
3013
+
3014
+ # Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
3015
+ "\uA762" => "Z"
3016
+
3017
+ # Z [FULLWIDTH LATIN CAPITAL LETTER Z]
3018
+ "\uFF3A" => "Z"
3019
+
3020
+ # ź [LATIN SMALL LETTER Z WITH ACUTE]
3021
+ "\u017A" => "z"
3022
+
3023
+ # ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
3024
+ "\u017C" => "z"
3025
+
3026
+ # ž [LATIN SMALL LETTER Z WITH CARON]
3027
+ "\u017E" => "z"
3028
+
3029
+ # ƶ [LATIN SMALL LETTER Z WITH STROKE]
3030
+ "\u01B6" => "z"
3031
+
3032
+ # ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
3033
+ "\u021D" => "z"
3034
+
3035
+ # ȥ [LATIN SMALL LETTER Z WITH HOOK]
3036
+ "\u0225" => "z"
3037
+
3038
+ # ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
3039
+ "\u0240" => "z"
3040
+
3041
+ # ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
3042
+ "\u0290" => "z"
3043
+
3044
+ # ʑ [LATIN SMALL LETTER Z WITH CURL]
3045
+ "\u0291" => "z"
3046
+
3047
+ # ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
3048
+ "\u1D76" => "z"
3049
+
3050
+ # ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
3051
+ "\u1D8E" => "z"
3052
+
3053
+ # ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
3054
+ "\u1E91" => "z"
3055
+
3056
+ # ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
3057
+ "\u1E93" => "z"
3058
+
3059
+ # ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
3060
+ "\u1E95" => "z"
3061
+
3062
+ # ⓩ [CIRCLED LATIN SMALL LETTER Z]
3063
+ "\u24E9" => "z"
3064
+
3065
+ # ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
3066
+ "\u2C6C" => "z"
3067
+
3068
+ # ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
3069
+ "\uA763" => "z"
3070
+
3071
+ # z [FULLWIDTH LATIN SMALL LETTER Z]
3072
+ "\uFF5A" => "z"
3073
+
3074
+ # ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
3075
+ "\u24B5" => "(z)"
3076
+
3077
+ # ⁰ [SUPERSCRIPT ZERO]
3078
+ "\u2070" => "0"
3079
+
3080
+ # ₀ [SUBSCRIPT ZERO]
3081
+ "\u2080" => "0"
3082
+
3083
+ # ⓪ [CIRCLED DIGIT ZERO]
3084
+ "\u24EA" => "0"
3085
+
3086
+ # ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
3087
+ "\u24FF" => "0"
3088
+
3089
+ # 0 [FULLWIDTH DIGIT ZERO]
3090
+ "\uFF10" => "0"
3091
+
3092
+ # ¹ [SUPERSCRIPT ONE]
3093
+ "\u00B9" => "1"
3094
+
3095
+ # ₁ [SUBSCRIPT ONE]
3096
+ "\u2081" => "1"
3097
+
3098
+ # ① [CIRCLED DIGIT ONE]
3099
+ "\u2460" => "1"
3100
+
3101
+ # ⓵ [DOUBLE CIRCLED DIGIT ONE]
3102
+ "\u24F5" => "1"
3103
+
3104
+ # ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
3105
+ "\u2776" => "1"
3106
+
3107
+ # ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
3108
+ "\u2780" => "1"
3109
+
3110
+ # ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
3111
+ "\u278A" => "1"
3112
+
3113
+ # 1 [FULLWIDTH DIGIT ONE]
3114
+ "\uFF11" => "1"
3115
+
3116
+ # ⒈ [DIGIT ONE FULL STOP]
3117
+ "\u2488" => "1."
3118
+
3119
+ # ⑴ [PARENTHESIZED DIGIT ONE]
3120
+ "\u2474" => "(1)"
3121
+
3122
+ # ² [SUPERSCRIPT TWO]
3123
+ "\u00B2" => "2"
3124
+
3125
+ # ₂ [SUBSCRIPT TWO]
3126
+ "\u2082" => "2"
3127
+
3128
+ # ② [CIRCLED DIGIT TWO]
3129
+ "\u2461" => "2"
3130
+
3131
+ # ⓶ [DOUBLE CIRCLED DIGIT TWO]
3132
+ "\u24F6" => "2"
3133
+
3134
+ # ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
3135
+ "\u2777" => "2"
3136
+
3137
+ # ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
3138
+ "\u2781" => "2"
3139
+
3140
+ # ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
3141
+ "\u278B" => "2"
3142
+
3143
+ # 2 [FULLWIDTH DIGIT TWO]
3144
+ "\uFF12" => "2"
3145
+
3146
+ # ⒉ [DIGIT TWO FULL STOP]
3147
+ "\u2489" => "2."
3148
+
3149
+ # ⑵ [PARENTHESIZED DIGIT TWO]
3150
+ "\u2475" => "(2)"
3151
+
3152
+ # ³ [SUPERSCRIPT THREE]
3153
+ "\u00B3" => "3"
3154
+
3155
+ # ₃ [SUBSCRIPT THREE]
3156
+ "\u2083" => "3"
3157
+
3158
+ # ③ [CIRCLED DIGIT THREE]
3159
+ "\u2462" => "3"
3160
+
3161
+ # ⓷ [DOUBLE CIRCLED DIGIT THREE]
3162
+ "\u24F7" => "3"
3163
+
3164
+ # ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
3165
+ "\u2778" => "3"
3166
+
3167
+ # ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
3168
+ "\u2782" => "3"
3169
+
3170
+ # ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
3171
+ "\u278C" => "3"
3172
+
3173
+ # 3 [FULLWIDTH DIGIT THREE]
3174
+ "\uFF13" => "3"
3175
+
3176
+ # ⒊ [DIGIT THREE FULL STOP]
3177
+ "\u248A" => "3."
3178
+
3179
+ # ⑶ [PARENTHESIZED DIGIT THREE]
3180
+ "\u2476" => "(3)"
3181
+
3182
+ # ⁴ [SUPERSCRIPT FOUR]
3183
+ "\u2074" => "4"
3184
+
3185
+ # ₄ [SUBSCRIPT FOUR]
3186
+ "\u2084" => "4"
3187
+
3188
+ # ④ [CIRCLED DIGIT FOUR]
3189
+ "\u2463" => "4"
3190
+
3191
+ # ⓸ [DOUBLE CIRCLED DIGIT FOUR]
3192
+ "\u24F8" => "4"
3193
+
3194
+ # ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
3195
+ "\u2779" => "4"
3196
+
3197
+ # ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
3198
+ "\u2783" => "4"
3199
+
3200
+ # ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
3201
+ "\u278D" => "4"
3202
+
3203
+ # 4 [FULLWIDTH DIGIT FOUR]
3204
+ "\uFF14" => "4"
3205
+
3206
+ # ⒋ [DIGIT FOUR FULL STOP]
3207
+ "\u248B" => "4."
3208
+
3209
+ # ⑷ [PARENTHESIZED DIGIT FOUR]
3210
+ "\u2477" => "(4)"
3211
+
3212
+ # ⁵ [SUPERSCRIPT FIVE]
3213
+ "\u2075" => "5"
3214
+
3215
+ # ₅ [SUBSCRIPT FIVE]
3216
+ "\u2085" => "5"
3217
+
3218
+ # ⑤ [CIRCLED DIGIT FIVE]
3219
+ "\u2464" => "5"
3220
+
3221
+ # ⓹ [DOUBLE CIRCLED DIGIT FIVE]
3222
+ "\u24F9" => "5"
3223
+
3224
+ # ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
3225
+ "\u277A" => "5"
3226
+
3227
+ # ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
3228
+ "\u2784" => "5"
3229
+
3230
+ # ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
3231
+ "\u278E" => "5"
3232
+
3233
+ # 5 [FULLWIDTH DIGIT FIVE]
3234
+ "\uFF15" => "5"
3235
+
3236
+ # ⒌ [DIGIT FIVE FULL STOP]
3237
+ "\u248C" => "5."
3238
+
3239
+ # ⑸ [PARENTHESIZED DIGIT FIVE]
3240
+ "\u2478" => "(5)"
3241
+
3242
+ # ⁶ [SUPERSCRIPT SIX]
3243
+ "\u2076" => "6"
3244
+
3245
+ # ₆ [SUBSCRIPT SIX]
3246
+ "\u2086" => "6"
3247
+
3248
+ # ⑥ [CIRCLED DIGIT SIX]
3249
+ "\u2465" => "6"
3250
+
3251
+ # ⓺ [DOUBLE CIRCLED DIGIT SIX]
3252
+ "\u24FA" => "6"
3253
+
3254
+ # ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
3255
+ "\u277B" => "6"
3256
+
3257
+ # ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
3258
+ "\u2785" => "6"
3259
+
3260
+ # ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
3261
+ "\u278F" => "6"
3262
+
3263
+ # 6 [FULLWIDTH DIGIT SIX]
3264
+ "\uFF16" => "6"
3265
+
3266
+ # ⒍ [DIGIT SIX FULL STOP]
3267
+ "\u248D" => "6."
3268
+
3269
+ # ⑹ [PARENTHESIZED DIGIT SIX]
3270
+ "\u2479" => "(6)"
3271
+
3272
+ # ⁷ [SUPERSCRIPT SEVEN]
3273
+ "\u2077" => "7"
3274
+
3275
+ # ₇ [SUBSCRIPT SEVEN]
3276
+ "\u2087" => "7"
3277
+
3278
+ # ⑦ [CIRCLED DIGIT SEVEN]
3279
+ "\u2466" => "7"
3280
+
3281
+ # ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
3282
+ "\u24FB" => "7"
3283
+
3284
+ # ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
3285
+ "\u277C" => "7"
3286
+
3287
+ # ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
3288
+ "\u2786" => "7"
3289
+
3290
+ # ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
3291
+ "\u2790" => "7"
3292
+
3293
+ # 7 [FULLWIDTH DIGIT SEVEN]
3294
+ "\uFF17" => "7"
3295
+
3296
+ # ⒎ [DIGIT SEVEN FULL STOP]
3297
+ "\u248E" => "7."
3298
+
3299
+ # ⑺ [PARENTHESIZED DIGIT SEVEN]
3300
+ "\u247A" => "(7)"
3301
+
3302
+ # ⁸ [SUPERSCRIPT EIGHT]
3303
+ "\u2078" => "8"
3304
+
3305
+ # ₈ [SUBSCRIPT EIGHT]
3306
+ "\u2088" => "8"
3307
+
3308
+ # ⑧ [CIRCLED DIGIT EIGHT]
3309
+ "\u2467" => "8"
3310
+
3311
+ # ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
3312
+ "\u24FC" => "8"
3313
+
3314
+ # ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
3315
+ "\u277D" => "8"
3316
+
3317
+ # ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
3318
+ "\u2787" => "8"
3319
+
3320
+ # ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
3321
+ "\u2791" => "8"
3322
+
3323
+ # 8 [FULLWIDTH DIGIT EIGHT]
3324
+ "\uFF18" => "8"
3325
+
3326
+ # ⒏ [DIGIT EIGHT FULL STOP]
3327
+ "\u248F" => "8."
3328
+
3329
+ # ⑻ [PARENTHESIZED DIGIT EIGHT]
3330
+ "\u247B" => "(8)"
3331
+
3332
+ # ⁹ [SUPERSCRIPT NINE]
3333
+ "\u2079" => "9"
3334
+
3335
+ # ₉ [SUBSCRIPT NINE]
3336
+ "\u2089" => "9"
3337
+
3338
+ # ⑨ [CIRCLED DIGIT NINE]
3339
+ "\u2468" => "9"
3340
+
3341
+ # ⓽ [DOUBLE CIRCLED DIGIT NINE]
3342
+ "\u24FD" => "9"
3343
+
3344
+ # ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
3345
+ "\u277E" => "9"
3346
+
3347
+ # ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
3348
+ "\u2788" => "9"
3349
+
3350
+ # ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
3351
+ "\u2792" => "9"
3352
+
3353
+ # 9 [FULLWIDTH DIGIT NINE]
3354
+ "\uFF19" => "9"
3355
+
3356
+ # ⒐ [DIGIT NINE FULL STOP]
3357
+ "\u2490" => "9."
3358
+
3359
+ # ⑼ [PARENTHESIZED DIGIT NINE]
3360
+ "\u247C" => "(9)"
3361
+
3362
+ # ⑩ [CIRCLED NUMBER TEN]
3363
+ "\u2469" => "10"
3364
+
3365
+ # ⓾ [DOUBLE CIRCLED NUMBER TEN]
3366
+ "\u24FE" => "10"
3367
+
3368
+ # ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
3369
+ "\u277F" => "10"
3370
+
3371
+ # ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
3372
+ "\u2789" => "10"
3373
+
3374
+ # ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
3375
+ "\u2793" => "10"
3376
+
3377
+ # ⒑ [NUMBER TEN FULL STOP]
3378
+ "\u2491" => "10."
3379
+
3380
+ # ⑽ [PARENTHESIZED NUMBER TEN]
3381
+ "\u247D" => "(10)"
3382
+
3383
+ # ⑪ [CIRCLED NUMBER ELEVEN]
3384
+ "\u246A" => "11"
3385
+
3386
+ # ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
3387
+ "\u24EB" => "11"
3388
+
3389
+ # ⒒ [NUMBER ELEVEN FULL STOP]
3390
+ "\u2492" => "11."
3391
+
3392
+ # ⑾ [PARENTHESIZED NUMBER ELEVEN]
3393
+ "\u247E" => "(11)"
3394
+
3395
+ # ⑫ [CIRCLED NUMBER TWELVE]
3396
+ "\u246B" => "12"
3397
+
3398
+ # ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
3399
+ "\u24EC" => "12"
3400
+
3401
+ # ⒓ [NUMBER TWELVE FULL STOP]
3402
+ "\u2493" => "12."
3403
+
3404
+ # ⑿ [PARENTHESIZED NUMBER TWELVE]
3405
+ "\u247F" => "(12)"
3406
+
3407
+ # ⑬ [CIRCLED NUMBER THIRTEEN]
3408
+ "\u246C" => "13"
3409
+
3410
+ # ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
3411
+ "\u24ED" => "13"
3412
+
3413
+ # ⒔ [NUMBER THIRTEEN FULL STOP]
3414
+ "\u2494" => "13."
3415
+
3416
+ # ⒀ [PARENTHESIZED NUMBER THIRTEEN]
3417
+ "\u2480" => "(13)"
3418
+
3419
+ # ⑭ [CIRCLED NUMBER FOURTEEN]
3420
+ "\u246D" => "14"
3421
+
3422
+ # ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
3423
+ "\u24EE" => "14"
3424
+
3425
+ # ⒕ [NUMBER FOURTEEN FULL STOP]
3426
+ "\u2495" => "14."
3427
+
3428
+ # ⒁ [PARENTHESIZED NUMBER FOURTEEN]
3429
+ "\u2481" => "(14)"
3430
+
3431
+ # ⑮ [CIRCLED NUMBER FIFTEEN]
3432
+ "\u246E" => "15"
3433
+
3434
+ # ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
3435
+ "\u24EF" => "15"
3436
+
3437
+ # ⒖ [NUMBER FIFTEEN FULL STOP]
3438
+ "\u2496" => "15."
3439
+
3440
+ # ⒂ [PARENTHESIZED NUMBER FIFTEEN]
3441
+ "\u2482" => "(15)"
3442
+
3443
+ # ⑯ [CIRCLED NUMBER SIXTEEN]
3444
+ "\u246F" => "16"
3445
+
3446
+ # ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
3447
+ "\u24F0" => "16"
3448
+
3449
+ # ⒗ [NUMBER SIXTEEN FULL STOP]
3450
+ "\u2497" => "16."
3451
+
3452
+ # ⒃ [PARENTHESIZED NUMBER SIXTEEN]
3453
+ "\u2483" => "(16)"
3454
+
3455
+ # ⑰ [CIRCLED NUMBER SEVENTEEN]
3456
+ "\u2470" => "17"
3457
+
3458
+ # ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
3459
+ "\u24F1" => "17"
3460
+
3461
+ # ⒘ [NUMBER SEVENTEEN FULL STOP]
3462
+ "\u2498" => "17."
3463
+
3464
+ # ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
3465
+ "\u2484" => "(17)"
3466
+
3467
+ # ⑱ [CIRCLED NUMBER EIGHTEEN]
3468
+ "\u2471" => "18"
3469
+
3470
+ # ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
3471
+ "\u24F2" => "18"
3472
+
3473
+ # ⒙ [NUMBER EIGHTEEN FULL STOP]
3474
+ "\u2499" => "18."
3475
+
3476
+ # ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
3477
+ "\u2485" => "(18)"
3478
+
3479
+ # ⑲ [CIRCLED NUMBER NINETEEN]
3480
+ "\u2472" => "19"
3481
+
3482
+ # ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
3483
+ "\u24F3" => "19"
3484
+
3485
+ # ⒚ [NUMBER NINETEEN FULL STOP]
3486
+ "\u249A" => "19."
3487
+
3488
+ # ⒆ [PARENTHESIZED NUMBER NINETEEN]
3489
+ "\u2486" => "(19)"
3490
+
3491
+ # ⑳ [CIRCLED NUMBER TWENTY]
3492
+ "\u2473" => "20"
3493
+
3494
+ # ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
3495
+ "\u24F4" => "20"
3496
+
3497
+ # ⒛ [NUMBER TWENTY FULL STOP]
3498
+ "\u249B" => "20."
3499
+
3500
+ # ⒇ [PARENTHESIZED NUMBER TWENTY]
3501
+ "\u2487" => "(20)"
3502
+
3503
+ # « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
3504
+ "\u00AB" => "\""
3505
+
3506
+ # » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
3507
+ "\u00BB" => "\""
3508
+
3509
+ # “ [LEFT DOUBLE QUOTATION MARK]
3510
+ "\u201C" => "\""
3511
+
3512
+ # ” [RIGHT DOUBLE QUOTATION MARK]
3513
+ "\u201D" => "\""
3514
+
3515
+ # „ [DOUBLE LOW-9 QUOTATION MARK]
3516
+ "\u201E" => "\""
3517
+
3518
+ # ″ [DOUBLE PRIME]
3519
+ "\u2033" => "\""
3520
+
3521
+ # ‶ [REVERSED DOUBLE PRIME]
3522
+ "\u2036" => "\""
3523
+
3524
+ # ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
3525
+ "\u275D" => "\""
3526
+
3527
+ # ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
3528
+ "\u275E" => "\""
3529
+
3530
+ # ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
3531
+ "\u276E" => "\""
3532
+
3533
+ # ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
3534
+ "\u276F" => "\""
3535
+
3536
+ # " [FULLWIDTH QUOTATION MARK]
3537
+ "\uFF02" => "\""
3538
+
3539
+ # ‘ [LEFT SINGLE QUOTATION MARK]
3540
+ "\u2018" => "\'"
3541
+
3542
+ # ’ [RIGHT SINGLE QUOTATION MARK]
3543
+ "\u2019" => "\'"
3544
+
3545
+ # ‚ [SINGLE LOW-9 QUOTATION MARK]
3546
+ "\u201A" => "\'"
3547
+
3548
+ # ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
3549
+ "\u201B" => "\'"
3550
+
3551
+ # ′ [PRIME]
3552
+ "\u2032" => "\'"
3553
+
3554
+ # ‵ [REVERSED PRIME]
3555
+ "\u2035" => "\'"
3556
+
3557
+ # ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
3558
+ "\u2039" => "\'"
3559
+
3560
+ # › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
3561
+ "\u203A" => "\'"
3562
+
3563
+ # ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
3564
+ "\u275B" => "\'"
3565
+
3566
+ # ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
3567
+ "\u275C" => "\'"
3568
+
3569
+ # ' [FULLWIDTH APOSTROPHE]
3570
+ "\uFF07" => "\'"
3571
+
3572
+ # ‐ [HYPHEN]
3573
+ "\u2010" => "-"
3574
+
3575
+ # ‑ [NON-BREAKING HYPHEN]
3576
+ "\u2011" => "-"
3577
+
3578
+ # ‒ [FIGURE DASH]
3579
+ "\u2012" => "-"
3580
+
3581
+ # – [EN DASH]
3582
+ "\u2013" => "-"
3583
+
3584
+ # — [EM DASH]
3585
+ "\u2014" => "-"
3586
+
3587
+ # ⁻ [SUPERSCRIPT MINUS]
3588
+ "\u207B" => "-"
3589
+
3590
+ # ₋ [SUBSCRIPT MINUS]
3591
+ "\u208B" => "-"
3592
+
3593
+ # - [FULLWIDTH HYPHEN-MINUS]
3594
+ "\uFF0D" => "-"
3595
+
3596
+ # ⁅ [LEFT SQUARE BRACKET WITH QUILL]
3597
+ "\u2045" => "["
3598
+
3599
+ # ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
3600
+ "\u2772" => "["
3601
+
3602
+ # [ [FULLWIDTH LEFT SQUARE BRACKET]
3603
+ "\uFF3B" => "["
3604
+
3605
+ # ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
3606
+ "\u2046" => "]"
3607
+
3608
+ # ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
3609
+ "\u2773" => "]"
3610
+
3611
+ # ] [FULLWIDTH RIGHT SQUARE BRACKET]
3612
+ "\uFF3D" => "]"
3613
+
3614
+ # ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
3615
+ "\u207D" => "("
3616
+
3617
+ # ₍ [SUBSCRIPT LEFT PARENTHESIS]
3618
+ "\u208D" => "("
3619
+
3620
+ # ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
3621
+ "\u2768" => "("
3622
+
3623
+ # ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
3624
+ "\u276A" => "("
3625
+
3626
+ # ( [FULLWIDTH LEFT PARENTHESIS]
3627
+ "\uFF08" => "("
3628
+
3629
+ # ⸨ [LEFT DOUBLE PARENTHESIS]
3630
+ "\u2E28" => "(("
3631
+
3632
+ # ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
3633
+ "\u207E" => ")"
3634
+
3635
+ # ₎ [SUBSCRIPT RIGHT PARENTHESIS]
3636
+ "\u208E" => ")"
3637
+
3638
+ # ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
3639
+ "\u2769" => ")"
3640
+
3641
+ # ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
3642
+ "\u276B" => ")"
3643
+
3644
+ # ) [FULLWIDTH RIGHT PARENTHESIS]
3645
+ "\uFF09" => ")"
3646
+
3647
+ # ⸩ [RIGHT DOUBLE PARENTHESIS]
3648
+ "\u2E29" => "))"
3649
+
3650
+ # ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
3651
+ "\u276C" => "<"
3652
+
3653
+ # ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
3654
+ "\u2770" => "<"
3655
+
3656
+ # < [FULLWIDTH LESS-THAN SIGN]
3657
+ "\uFF1C" => "<"
3658
+
3659
+ # ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
3660
+ "\u276D" => ">"
3661
+
3662
+ # ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
3663
+ "\u2771" => ">"
3664
+
3665
+ # > [FULLWIDTH GREATER-THAN SIGN]
3666
+ "\uFF1E" => ">"
3667
+
3668
+ # ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
3669
+ "\u2774" => "{"
3670
+
3671
+ # { [FULLWIDTH LEFT CURLY BRACKET]
3672
+ "\uFF5B" => "{"
3673
+
3674
+ # ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
3675
+ "\u2775" => "}"
3676
+
3677
+ # } [FULLWIDTH RIGHT CURLY BRACKET]
3678
+ "\uFF5D" => "}"
3679
+
3680
+ # ⁺ [SUPERSCRIPT PLUS SIGN]
3681
+ "\u207A" => "+"
3682
+
3683
+ # ₊ [SUBSCRIPT PLUS SIGN]
3684
+ "\u208A" => "+"
3685
+
3686
+ # + [FULLWIDTH PLUS SIGN]
3687
+ "\uFF0B" => "+"
3688
+
3689
+ # ⁼ [SUPERSCRIPT EQUALS SIGN]
3690
+ "\u207C" => "="
3691
+
3692
+ # ₌ [SUBSCRIPT EQUALS SIGN]
3693
+ "\u208C" => "="
3694
+
3695
+ # = [FULLWIDTH EQUALS SIGN]
3696
+ "\uFF1D" => "="
3697
+
3698
+ # ! [FULLWIDTH EXCLAMATION MARK]
3699
+ "\uFF01" => "!"
3700
+
3701
+ # ‼ [DOUBLE EXCLAMATION MARK]
3702
+ "\u203C" => "!!"
3703
+
3704
+ # ⁉ [EXCLAMATION QUESTION MARK]
3705
+ "\u2049" => "!?"
3706
+
3707
+ # # [FULLWIDTH NUMBER SIGN]
3708
+ "\uFF03" => "#"
3709
+
3710
+ # $ [FULLWIDTH DOLLAR SIGN]
3711
+ "\uFF04" => "$"
3712
+
3713
+ # ⁒ [COMMERCIAL MINUS SIGN]
3714
+ "\u2052" => "%"
3715
+
3716
+ # % [FULLWIDTH PERCENT SIGN]
3717
+ "\uFF05" => "%"
3718
+
3719
+ # & [FULLWIDTH AMPERSAND]
3720
+ "\uFF06" => "&"
3721
+
3722
+ # ⁎ [LOW ASTERISK]
3723
+ "\u204E" => "*"
3724
+
3725
+ # * [FULLWIDTH ASTERISK]
3726
+ "\uFF0A" => "*"
3727
+
3728
+ # , [FULLWIDTH COMMA]
3729
+ "\uFF0C" => ","
3730
+
3731
+ # . [FULLWIDTH FULL STOP]
3732
+ "\uFF0E" => "."
3733
+
3734
+ # ⁄ [FRACTION SLASH]
3735
+ "\u2044" => "/"
3736
+
3737
+ # / [FULLWIDTH SOLIDUS]
3738
+ "\uFF0F" => "/"
3739
+
3740
+ # : [FULLWIDTH COLON]
3741
+ "\uFF1A" => ":"
3742
+
3743
+ # ⁏ [REVERSED SEMICOLON]
3744
+ "\u204F" => ";"
3745
+
3746
+ # ; [FULLWIDTH SEMICOLON]
3747
+ "\uFF1B" => ";"
3748
+
3749
+ # ? [FULLWIDTH QUESTION MARK]
3750
+ "\uFF1F" => "?"
3751
+
3752
+ # ⁇ [DOUBLE QUESTION MARK]
3753
+ "\u2047" => "??"
3754
+
3755
+ # ⁈ [QUESTION EXCLAMATION MARK]
3756
+ "\u2048" => "?!"
3757
+
3758
+ # @ [FULLWIDTH COMMERCIAL AT]
3759
+ "\uFF20" => "@"
3760
+
3761
+ # \ [FULLWIDTH REVERSE SOLIDUS]
3762
+ "\uFF3C" => "\\"
3763
+
3764
+ # ‸ [CARET]
3765
+ "\u2038" => "^"
3766
+
3767
+ # ^ [FULLWIDTH CIRCUMFLEX ACCENT]
3768
+ "\uFF3E" => "^"
3769
+
3770
+ # _ [FULLWIDTH LOW LINE]
3771
+ "\uFF3F" => "_"
3772
+
3773
+ # ⁓ [SWUNG DASH]
3774
+ "\u2053" => "~"
3775
+
3776
+ # ~ [FULLWIDTH TILDE]
3777
+ "\uFF5E" => "~"
3778
+
3779
+ ################################################################
3780
+ # Below is the Perl script used to generate the above mappings #
3781
+ # from ASCIIFoldingFilter.java: #
3782
+ ################################################################
3783
+ #
3784
+ # #!/usr/bin/perl
3785
+ #
3786
+ # use warnings;
3787
+ # use strict;
3788
+ #
3789
+ # my @source_chars = ();
3790
+ # my @source_char_descriptions = ();
3791
+ # my $target = '';
3792
+ #
3793
+ # while (<>) {
3794
+ # if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
3795
+ # push @source_chars, $1;
3796
+ # push @source_char_descriptions, $2;
3797
+ # next;
3798
+ # }
3799
+ # if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
3800
+ # $target .= $1;
3801
+ # next;
3802
+ # }
3803
+ # if (/break;/) {
3804
+ # $target = "\\\"" if ($target eq '"');
3805
+ # for my $source_char_num (0..$#source_chars) {
3806
+ # print "# $source_char_descriptions[$source_char_num]\n";
3807
+ # print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
3808
+ # }
3809
+ # @source_chars = ();
3810
+ # @source_char_descriptions = ();
3811
+ # $target = '';
3812
+ # }
3813
+ # }