smartmachine 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (229) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.rdoc +0 -0
  3. data/README.rdoc +87 -0
  4. data/bin/buildpacker +8 -0
  5. data/bin/prereceiver +8 -0
  6. data/bin/smartmachine +81 -0
  7. data/bin/smartrunner +28 -0
  8. data/lib/smart_machine.rb +24 -0
  9. data/lib/smart_machine/apps.rb +14 -0
  10. data/lib/smart_machine/apps/app.rb +182 -0
  11. data/lib/smart_machine/apps/rails.rb +250 -0
  12. data/lib/smart_machine/base.rb +11 -0
  13. data/lib/smart_machine/boot.rb +31 -0
  14. data/lib/smart_machine/buildpacker.rb +106 -0
  15. data/lib/smart_machine/credentials.rb +126 -0
  16. data/lib/smart_machine/docker.rb +144 -0
  17. data/lib/smart_machine/engine.rb +82 -0
  18. data/lib/smart_machine/engine/Dockerfile +27 -0
  19. data/lib/smart_machine/engine/buildpacks/rails/Dockerfile +21 -0
  20. data/lib/smart_machine/gem_version.rb +17 -0
  21. data/lib/smart_machine/grids.rb +15 -0
  22. data/lib/smart_machine/grids/elasticsearch.rb +97 -0
  23. data/lib/smart_machine/grids/elasticsearch/.keep +0 -0
  24. data/lib/smart_machine/grids/minio.rb +77 -0
  25. data/lib/smart_machine/grids/minio/.keep +0 -0
  26. data/lib/smart_machine/grids/mysql.rb +66 -0
  27. data/lib/smart_machine/grids/mysql/docker-entrypoint-initdb.d/.keep +0 -0
  28. data/lib/smart_machine/grids/nginx.rb +137 -0
  29. data/lib/smart_machine/grids/nginx/.keep +0 -0
  30. data/lib/smart_machine/grids/prereceiver.rb +170 -0
  31. data/lib/smart_machine/grids/prereceiver/Dockerfile +20 -0
  32. data/lib/smart_machine/grids/prereceiver/fcgiwrap/APKBUILD +49 -0
  33. data/lib/smart_machine/grids/prereceiver/fcgiwrap/fcgiwrap.confd +6 -0
  34. data/lib/smart_machine/grids/prereceiver/fcgiwrap/fcgiwrap.initd +43 -0
  35. data/lib/smart_machine/grids/prereceiver/fcgiwrap/fcgiwrap.pre-install +7 -0
  36. data/lib/smart_machine/grids/prereceiver/fcgiwrap/packages/main/x86_64/APKINDEX.tar.gz +0 -0
  37. data/lib/smart_machine/grids/prereceiver/fcgiwrap/packages/main/x86_64/fcgiwrap-1.1.1-r4.apk +0 -0
  38. data/lib/smart_machine/grids/prereceiver/fcgiwrap/packages/main/x86_64/fcgiwrap-doc-1.1.1-r4.apk +0 -0
  39. data/lib/smart_machine/grids/prereceiver/fcgiwrap/packages/main/x86_64/fcgiwrap-openrc-1.1.1-r4.apk +0 -0
  40. data/lib/smart_machine/grids/redis.rb +58 -0
  41. data/lib/smart_machine/grids/redis/.keep +0 -0
  42. data/lib/smart_machine/grids/solr.rb +99 -0
  43. data/lib/smart_machine/grids/solr/config/.keep +0 -0
  44. data/lib/smart_machine/grids/solr/config/README.txt +77 -0
  45. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/contractions_ca.txt +8 -0
  46. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/contractions_fr.txt +15 -0
  47. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/contractions_ga.txt +5 -0
  48. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/contractions_it.txt +23 -0
  49. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/hyphenations_ga.txt +5 -0
  50. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stemdict_nl.txt +6 -0
  51. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stoptags_ja.txt +420 -0
  52. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ar.txt +125 -0
  53. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_bg.txt +193 -0
  54. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ca.txt +220 -0
  55. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_cz.txt +172 -0
  56. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_da.txt +110 -0
  57. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_de.txt +294 -0
  58. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_el.txt +78 -0
  59. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_en.txt +54 -0
  60. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_es.txt +356 -0
  61. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_eu.txt +99 -0
  62. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_fa.txt +313 -0
  63. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_fi.txt +97 -0
  64. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_fr.txt +186 -0
  65. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ga.txt +110 -0
  66. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_gl.txt +161 -0
  67. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_hi.txt +235 -0
  68. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_hu.txt +211 -0
  69. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_hy.txt +46 -0
  70. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_id.txt +359 -0
  71. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_it.txt +303 -0
  72. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ja.txt +127 -0
  73. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_lv.txt +172 -0
  74. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_nl.txt +119 -0
  75. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_no.txt +194 -0
  76. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_pt.txt +253 -0
  77. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ro.txt +233 -0
  78. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_ru.txt +243 -0
  79. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_sv.txt +133 -0
  80. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_th.txt +119 -0
  81. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/stopwords_tr.txt +212 -0
  82. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/lang/userdict_ja.txt +29 -0
  83. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/managed-schema +1007 -0
  84. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/params.json +20 -0
  85. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/protwords.txt +21 -0
  86. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/solrconfig.xml +1355 -0
  87. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/stopwords.txt +14 -0
  88. data/lib/smart_machine/grids/solr/config/configsets/_default/conf/synonyms.txt +29 -0
  89. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/_rest_managed.json +1 -0
  90. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/_schema_analysis_stopwords_english.json +38 -0
  91. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/_schema_analysis_synonyms_english.json +11 -0
  92. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/clustering/carrot2/README.txt +11 -0
  93. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
  94. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/clustering/carrot2/lingo-attributes.xml +24 -0
  95. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/clustering/carrot2/stc-attributes.xml +19 -0
  96. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/currency.xml +67 -0
  97. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/elevate.xml +42 -0
  98. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/contractions_ca.txt +8 -0
  99. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/contractions_fr.txt +15 -0
  100. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/contractions_ga.txt +5 -0
  101. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/contractions_it.txt +23 -0
  102. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/hyphenations_ga.txt +5 -0
  103. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stemdict_nl.txt +6 -0
  104. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stoptags_ja.txt +420 -0
  105. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ar.txt +125 -0
  106. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_bg.txt +193 -0
  107. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ca.txt +220 -0
  108. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ckb.txt +136 -0
  109. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_cz.txt +172 -0
  110. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_da.txt +110 -0
  111. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_de.txt +294 -0
  112. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_el.txt +78 -0
  113. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_en.txt +54 -0
  114. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_es.txt +356 -0
  115. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_eu.txt +99 -0
  116. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_fa.txt +313 -0
  117. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_fi.txt +97 -0
  118. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_fr.txt +186 -0
  119. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ga.txt +110 -0
  120. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_gl.txt +161 -0
  121. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_hi.txt +235 -0
  122. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_hu.txt +211 -0
  123. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_hy.txt +46 -0
  124. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_id.txt +359 -0
  125. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_it.txt +303 -0
  126. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ja.txt +127 -0
  127. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_lv.txt +172 -0
  128. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_nl.txt +119 -0
  129. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_no.txt +194 -0
  130. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_pt.txt +253 -0
  131. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ro.txt +233 -0
  132. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_ru.txt +243 -0
  133. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_sv.txt +133 -0
  134. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_th.txt +119 -0
  135. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/stopwords_tr.txt +212 -0
  136. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/lang/userdict_ja.txt +29 -0
  137. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/managed-schema +1187 -0
  138. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/mapping-FoldToASCII.txt +3813 -0
  139. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/mapping-ISOLatin1Accent.txt +246 -0
  140. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/params.json +11 -0
  141. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/protwords.txt +21 -0
  142. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/solrconfig.xml +1616 -0
  143. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/spellings.txt +2 -0
  144. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/stopwords.txt +14 -0
  145. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/synonyms.txt +29 -0
  146. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/update-script.js +53 -0
  147. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/README.txt +101 -0
  148. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/VM_global_library.vm +186 -0
  149. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/browse.vm +33 -0
  150. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/cluster.vm +19 -0
  151. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/cluster_results.vm +31 -0
  152. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/debug.vm +28 -0
  153. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/did_you_mean.vm +11 -0
  154. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/error.vm +11 -0
  155. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/facet_fields.vm +24 -0
  156. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/facet_pivot.vm +12 -0
  157. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/facet_queries.vm +12 -0
  158. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/facet_ranges.vm +23 -0
  159. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/facets.vm +10 -0
  160. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/footer.vm +43 -0
  161. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/head.vm +37 -0
  162. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/header.vm +7 -0
  163. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/hit.vm +25 -0
  164. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/hit_grouped.vm +43 -0
  165. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/hit_plain.vm +25 -0
  166. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/join_doc.vm +20 -0
  167. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/jquery.autocomplete.css +48 -0
  168. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/jquery.autocomplete.js +763 -0
  169. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/layout.vm +24 -0
  170. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/main.css +231 -0
  171. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/mime_type_lists.vm +68 -0
  172. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/pagination_bottom.vm +22 -0
  173. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/pagination_top.vm +29 -0
  174. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/product_doc.vm +32 -0
  175. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/query.vm +42 -0
  176. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/query_form.vm +64 -0
  177. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/query_group.vm +43 -0
  178. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/query_spatial.vm +75 -0
  179. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/results_list.vm +22 -0
  180. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/richtext_doc.vm +153 -0
  181. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/suggest.vm +8 -0
  182. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/velocity/tabs.vm +50 -0
  183. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/xslt/example.xsl +132 -0
  184. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/xslt/example_atom.xsl +67 -0
  185. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/xslt/example_rss.xsl +66 -0
  186. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/xslt/luke.xsl +337 -0
  187. data/lib/smart_machine/grids/solr/config/configsets/sample_techproducts_configs/conf/xslt/updateXml.xsl +70 -0
  188. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/_rest_managed.json +1 -0
  189. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/admin-extra.html +31 -0
  190. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/currency.xml +67 -0
  191. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/elevate.xml +36 -0
  192. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/lang/stopwords_en.txt +54 -0
  193. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/mapping-ISOLatin1Accent.txt +246 -0
  194. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/protwords.txt +21 -0
  195. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/schema.xml +278 -0
  196. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/scripts.conf +24 -0
  197. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/solrconfig.xml +632 -0
  198. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/spellings.txt +2 -0
  199. data/lib/smart_machine/grids/solr/config/configsets/sunspot/conf/synonyms.txt +29 -0
  200. data/lib/smart_machine/grids/solr/config/lib/.keep +0 -0
  201. data/lib/smart_machine/grids/solr/config/solr.xml +53 -0
  202. data/lib/smart_machine/grids/solr/config/zoo.cfg +31 -0
  203. data/lib/smart_machine/grids/solr/docker-entrypoint-initdb.d/.keep +0 -0
  204. data/lib/smart_machine/grids/solr/sunspot/conf/schema.xml +278 -0
  205. data/lib/smart_machine/grids/solr/sunspot/conf/solrconfig.xml +632 -0
  206. data/lib/smart_machine/logger.rb +35 -0
  207. data/lib/smart_machine/machine.rb +192 -0
  208. data/lib/smart_machine/ssh.rb +43 -0
  209. data/lib/smart_machine/sync.rb +108 -0
  210. data/lib/smart_machine/templates/dotsmartmachine/apps/containers/.keep +0 -0
  211. data/lib/smart_machine/templates/dotsmartmachine/apps/repositories/.keep +0 -0
  212. data/lib/smart_machine/templates/dotsmartmachine/config/environment.rb +18 -0
  213. data/lib/smart_machine/templates/dotsmartmachine/config/users.yml +4 -0
  214. data/lib/smart_machine/templates/dotsmartmachine/grids/elasticsearch/data/.keep +0 -0
  215. data/lib/smart_machine/templates/dotsmartmachine/grids/elasticsearch/logs/.keep +0 -0
  216. data/lib/smart_machine/templates/dotsmartmachine/grids/minio/data/.keep +0 -0
  217. data/lib/smart_machine/templates/dotsmartmachine/grids/mysql/data/.keep +0 -0
  218. data/lib/smart_machine/templates/dotsmartmachine/grids/nginx/certificates/.keep +0 -0
  219. data/lib/smart_machine/templates/dotsmartmachine/grids/nginx/fastcgi.conf +11 -0
  220. data/lib/smart_machine/templates/dotsmartmachine/grids/nginx/htpasswd/.keep +0 -0
  221. data/lib/smart_machine/templates/dotsmartmachine/grids/nginx/nginx.tmpl +373 -0
  222. data/lib/smart_machine/templates/dotsmartmachine/grids/prereceiver/pre-receive +17 -0
  223. data/lib/smart_machine/templates/dotsmartmachine/grids/redis/data/.keep +0 -0
  224. data/lib/smart_machine/templates/dotsmartmachine/grids/solr/solr/.keep +0 -0
  225. data/lib/smart_machine/templates/dotsmartmachine/tmp/.keep +0 -0
  226. data/lib/smart_machine/user.rb +38 -0
  227. data/lib/smart_machine/version.rb +10 -0
  228. metadata +297 -12
  229. data/README.md +0 -9
@@ -0,0 +1,212 @@
1
+ # Turkish stopwords from LUCENE-559
2
+ # merged with the list from "Information Retrieval on Turkish Texts"
3
+ # (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
4
+ acaba
5
+ altmış
6
+ altı
7
+ ama
8
+ ancak
9
+ arada
10
+ aslında
11
+ ayrıca
12
+ bana
13
+ bazı
14
+ belki
15
+ ben
16
+ benden
17
+ beni
18
+ benim
19
+ beri
20
+ beş
21
+ bile
22
+ bin
23
+ bir
24
+ birçok
25
+ biri
26
+ birkaç
27
+ birkez
28
+ birşey
29
+ birşeyi
30
+ biz
31
+ bize
32
+ bizden
33
+ bizi
34
+ bizim
35
+ böyle
36
+ böylece
37
+ bu
38
+ buna
39
+ bunda
40
+ bundan
41
+ bunlar
42
+ bunları
43
+ bunların
44
+ bunu
45
+ bunun
46
+ burada
47
+ çok
48
+ çünkü
49
+ da
50
+ daha
51
+ dahi
52
+ de
53
+ defa
54
+ değil
55
+ diğer
56
+ diye
57
+ doksan
58
+ dokuz
59
+ dolayı
60
+ dolayısıyla
61
+ dört
62
+ edecek
63
+ eden
64
+ ederek
65
+ edilecek
66
+ ediliyor
67
+ edilmesi
68
+ ediyor
69
+ eğer
70
+ elli
71
+ en
72
+ etmesi
73
+ etti
74
+ ettiği
75
+ ettiğini
76
+ gibi
77
+ göre
78
+ halen
79
+ hangi
80
+ hatta
81
+ hem
82
+ henüz
83
+ hep
84
+ hepsi
85
+ her
86
+ herhangi
87
+ herkesin
88
+ hiç
89
+ hiçbir
90
+ için
91
+ iki
92
+ ile
93
+ ilgili
94
+ ise
95
+ işte
96
+ itibaren
97
+ itibariyle
98
+ kadar
99
+ karşın
100
+ katrilyon
101
+ kendi
102
+ kendilerine
103
+ kendini
104
+ kendisi
105
+ kendisine
106
+ kendisini
107
+ kez
108
+ ki
109
+ kim
110
+ kimden
111
+ kime
112
+ kimi
113
+ kimse
114
+ kırk
115
+ milyar
116
+ milyon
117
+ mu
118
+
119
+
120
+ nasıl
121
+ ne
122
+ neden
123
+ nedenle
124
+ nerde
125
+ nerede
126
+ nereye
127
+ niye
128
+ niçin
129
+ o
130
+ olan
131
+ olarak
132
+ oldu
133
+ olduğu
134
+ olduğunu
135
+ olduklarını
136
+ olmadı
137
+ olmadığı
138
+ olmak
139
+ olması
140
+ olmayan
141
+ olmaz
142
+ olsa
143
+ olsun
144
+ olup
145
+ olur
146
+ olursa
147
+ oluyor
148
+ on
149
+ ona
150
+ ondan
151
+ onlar
152
+ onlardan
153
+ onları
154
+ onların
155
+ onu
156
+ onun
157
+ otuz
158
+ oysa
159
+ öyle
160
+ pek
161
+ rağmen
162
+ sadece
163
+ sanki
164
+ sekiz
165
+ seksen
166
+ sen
167
+ senden
168
+ seni
169
+ senin
170
+ siz
171
+ sizden
172
+ sizi
173
+ sizin
174
+ şey
175
+ şeyden
176
+ şeyi
177
+ şeyler
178
+ şöyle
179
+ şu
180
+ şuna
181
+ şunda
182
+ şundan
183
+ şunları
184
+ şunu
185
+ tarafından
186
+ trilyon
187
+ tüm
188
+ üç
189
+ üzere
190
+ var
191
+ vardı
192
+ ve
193
+ veya
194
+ ya
195
+ yani
196
+ yapacak
197
+ yapılan
198
+ yapılması
199
+ yapıyor
200
+ yapmak
201
+ yaptı
202
+ yaptığı
203
+ yaptığını
204
+ yaptıkları
205
+ yedi
206
+ yerine
207
+ yetmiş
208
+ yine
209
+ yirmi
210
+ yoksa
211
+ yüz
212
+ zaten
@@ -0,0 +1,29 @@
1
+ #
2
+ # This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
3
+ #
4
+ # Add entries to this file in order to override the statistical model in terms
5
+ # of segmentation, readings and part-of-speech tags. Notice that entries do
6
+ # not have weights since they are always used when found. This is by-design
7
+ # in order to maximize ease-of-use.
8
+ #
9
+ # Entries are defined using the following CSV format:
10
+ # <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
11
+ #
12
+ # Notice that a single half-width space separates tokens and readings, and
13
+ # that the number tokens and readings must match exactly.
14
+ #
15
+ # Also notice that multiple entries with the same <text> is undefined.
16
+ #
17
+ # Whitespace only lines are ignored. Comments are not allowed on entry lines.
18
+ #
19
+
20
+ # Custom segmentation for kanji compounds
21
+ 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
22
+ 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
23
+
24
+ # Custom segmentation for compound katakana
25
+ トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
26
+ ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
27
+
28
+ # Custom reading for former sumo wrestler
29
+ 朝青龍,朝青龍,アサショウリュウ,カスタム人名
@@ -0,0 +1,1187 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!--
20
+ This is the Solr schema file. This file should be named "schema.xml" and
21
+ should be in the conf directory under the solr home
22
+ (i.e. ./solr/conf/schema.xml by default)
23
+ or located where the classloader for the Solr webapp can find it.
24
+
25
+ This example schema is the recommended starting point for users.
26
+ It should be kept correct and concise, usable out-of-the-box.
27
+
28
+ For more information, on how to customize this file, please see
29
+ http://wiki.apache.org/solr/SchemaXml
30
+
31
+ PERFORMANCE NOTE: this schema includes many optional features and should not
32
+ be used for benchmarking. To improve performance one could
33
+ - set stored="false" for all fields possible (esp large fields) when you
34
+ only need to search on the field but don't need to return the original
35
+ value.
36
+ - set indexed="false" if you don't need to search on the field, but only
37
+ return the field as a result of searching on other indexed fields.
38
+ - remove all unneeded copyField statements
39
+ - for best index size and searching performance, set "index" to false
40
+ for all general text fields, use copyField to copy them to the
41
+ catchall "text" field, and use that for searching.
42
+ - For maximum indexing performance, use the ConcurrentUpdateSolrServer
43
+ java client.
44
+ - Remember to run the JVM in server mode, and use a higher logging level
45
+ that avoids logging every request
46
+ -->
47
+
48
+ <schema name="example" version="1.6">
49
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
50
+ version="x.y" is Solr's version number for the schema syntax and
51
+ semantics. It should not normally be changed by applications.
52
+
53
+ 1.0: multiValued attribute did not exist, all fields are multiValued
54
+ by nature
55
+ 1.1: multiValued attribute introduced, false by default
56
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
57
+ except for text fields.
58
+ 1.3: removed optional field compress feature
59
+ 1.4: autoGeneratePhraseQueries attribute introduced to drive QueryParser
60
+ behavior when a single string produces multiple tokens. Defaults
61
+ to off for version >= 1.4
62
+ 1.5: omitNorms defaults to true for primitive field types
63
+ (int, float, boolean, string...)
64
+ 1.6: useDocValuesAsStored defaults to true.
65
+ -->
66
+
67
+
68
+ <!-- Valid attributes for fields:
69
+ name: mandatory - the name for the field
70
+ type: mandatory - the name of a field type from the
71
+ fieldTypes
72
+ indexed: true if this field should be indexed (searchable or sortable)
73
+ stored: true if this field should be retrievable
74
+ docValues: true if this field should have doc values. Doc values are
75
+ useful (required, if you are using *Point fields) for faceting,
76
+ grouping, sorting and function queries. Doc values will make the index
77
+ faster to load, more NRT-friendly and more memory-efficient.
78
+ They however come with some limitations: they are currently only
79
+ supported by StrField, UUIDField, all *PointFields, and depending
80
+ on the field type, they might require the field to be single-valued,
81
+ be required or have a default value (check the documentation
82
+ of the field type you're interested in for more information)
83
+ multiValued: true if this field may contain multiple values per document
84
+ omitNorms: (expert) set to true to omit the norms associated with
85
+ this field (this disables length normalization and index-time
86
+ boosting for the field, and saves some memory). Only full-text
87
+ fields or fields that need an index-time boost need norms.
88
+ Norms are omitted for primitive (non-analyzed) types by default.
89
+ termVectors: [false] set to true to store the term vector for a
90
+ given field.
91
+ When using MoreLikeThis, fields used for similarity should be
92
+ stored for best performance.
93
+ termPositions: Store position information with the term vector.
94
+ This will increase storage costs.
95
+ termOffsets: Store offset information with the term vector. This
96
+ will increase storage costs.
97
+ termPayloads: Store payload information with the term vector. This
98
+ will increase storage costs.
99
+ required: The field is required. It will throw an error if the
100
+ value does not exist
101
+ default: a value that should be used if no value is specified
102
+ when adding a document.
103
+ -->
104
+
105
+ <!-- field names should consist of alphanumeric or underscore characters only and
106
+ not start with a digit. This is not currently strictly enforced,
107
+ but other field names will not have first class support from all components
108
+ and back compatibility is not guaranteed. Names with both leading and
109
+ trailing underscores (e.g. _version_) are reserved.
110
+ -->
111
+
112
+ <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
113
+ or Solr won't start. _version_ and update log are required for SolrCloud
114
+ -->
115
+ <!-- doc values are enabled by default for primitive types such as long so we don't index the version field -->
116
+ <field name="_version_" type="plong" indexed="false" stored="false"/>
117
+
118
+ <!-- points to the root document of a block of nested documents. Required for nested
119
+ document support, may be removed otherwise
120
+ -->
121
+ <field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
122
+
123
+ <!-- Only remove the "id" field if you have a very good reason to. While not strictly
124
+ required, it is highly recommended. A <uniqueKey> is present in almost all Solr
125
+ installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
126
+ Do NOT change the type and apply index-time analysis to the <uniqueKey> as it will likely
127
+ make routing in SolrCloud and document replacement in general fail. Limited _query_ time
128
+ analysis is possible as long as the indexing process is guaranteed to index the term
129
+ in a compatible way. Any analysis applied to the <uniqueKey> should _not_ produce multiple
130
+ tokens
131
+ -->
132
+ <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
133
+
134
+ <field name="pre" type="preanalyzed" indexed="true" stored="true"/>
135
+ <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
136
+ <field name="name" type="text_general" indexed="true" stored="true"/>
137
+ <field name="manu" type="text_gen_sort" indexed="true" stored="true" omitNorms="true" multiValued="false"/>
138
+ <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
139
+ <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
140
+ <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
141
+
142
+ <field name="weight" type="pfloat" indexed="true" stored="true"/>
143
+ <field name="price" type="pfloat" indexed="true" stored="true"/>
144
+ <field name="popularity" type="pint" indexed="true" stored="true" />
145
+ <field name="inStock" type="boolean" indexed="true" stored="true" />
146
+
147
+ <field name="store" type="location" indexed="true" stored="true"/>
148
+
149
+ <!-- Common metadata fields, named specifically to match up with
150
+ SolrCell metadata when parsing rich documents such as Word, PDF.
151
+ Some fields are multiValued only because Tika currently may return
152
+ multiple values for them. Some metadata is parsed from the documents,
153
+ but there are some which come from the client context:
154
+ "content_type": From the HTTP headers of incoming stream
155
+ "resourcename": From SolrCell request param resource.name
156
+ -->
157
+ <field name="title" type="text_gen_sort" indexed="true" stored="true" multiValued="true"/>
158
+ <field name="subject" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/>
159
+ <field name="description" type="text_general" indexed="true" stored="true"/>
160
+ <field name="comments" type="text_general" indexed="true" stored="true"/>
161
+ <field name="author" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/>
162
+ <field name="keywords" type="text_general" indexed="true" stored="true"/>
163
+ <field name="category" type="text_general" indexed="true" stored="true"/>
164
+ <field name="resourcename" type="text_general" indexed="true" stored="true"/>
165
+ <field name="url" type="text_general" indexed="true" stored="true"/>
166
+ <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
167
+ <field name="last_modified" type="pdate" indexed="true" stored="true"/>
168
+ <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
169
+ <field name="_src_" type="string" indexed="false" stored="true"/>
170
+
171
+ <!-- Main body of document extracted by SolrCell.
172
+ NOTE: This field is not indexed by default, since it is also copied to "text"
173
+ using copyField below. This is to save space. Use this field for returning and
174
+ highlighting document content. Use the "text" field to search the content. -->
175
+ <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
176
+
177
+
178
+ <!-- catchall field, containing all other searchable text fields (implemented
179
+ via copyField further on in this schema -->
180
+ <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
181
+
182
+ <!-- catchall text field that indexes tokens both normally and in reverse for efficient
183
+ leading wildcard queries. -->
184
+ <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
185
+
186
+ <!-- non-tokenized version of manufacturer to make it easier to sort or group
187
+ results by manufacturer. copied from "manu" via copyField -->
188
+ <field name="manu_exact" type="string" indexed="true" stored="false" docValues="false" />
189
+
190
+ <field name="payloads" type="payloads" indexed="true" stored="true"/>
191
+
192
+
193
+ <!-- Dynamic field definitions allow using convention over configuration
194
+ for fields via the specification of patterns to match field names.
195
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
196
+ RESTRICTION: the glob-like pattern in the name attribute must have
197
+ a "*" only at the start or the end. -->
198
+
199
+ <dynamicField name="*_i" type="pint" indexed="true" stored="true"/>
200
+ <dynamicField name="*_is" type="pint" indexed="true" stored="true" multiValued="true"/>
201
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" />
202
+ <dynamicField name="*_s_ns" type="string" indexed="true" stored="false" />
203
+ <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
204
+ <dynamicField name="*_l" type="plong" indexed="true" stored="true"/>
205
+ <dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/>
206
+ <dynamicField name="*_ls" type="plong" indexed="true" stored="true" multiValued="true"/>
207
+ <dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
208
+ <dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
209
+ <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true"/>
210
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
211
+ <dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
212
+ <dynamicField name="*_f" type="pfloat" indexed="true" stored="true"/>
213
+ <dynamicField name="*_fs" type="pfloat" indexed="true" stored="true" multiValued="true"/>
214
+ <dynamicField name="*_d" type="pdouble" indexed="true" stored="true"/>
215
+ <dynamicField name="*_ds" type="pdouble" indexed="true" stored="true" multiValued="true"/>
216
+
217
+ <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
218
+ <dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/>
219
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
220
+
221
+ <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
222
+
223
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
224
+ <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
225
+
226
+ <dynamicField name="random_*" type="random" />
227
+
228
+ <!-- uncomment the following to ignore any fields that don't already match an existing
229
+ field name or dynamic field, rather than reporting them as an error.
230
+ alternately, change the type="ignored" to some other type e.g. "text" if you want
231
+ unknown fields indexed and/or stored by default -->
232
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
233
+
234
+
235
+ <!-- Field to use to determine and enforce document uniqueness.
236
+ Unless this field is marked with required="false", it will be a required field
237
+ -->
238
+ <uniqueKey>id</uniqueKey>
239
+
240
+ <!-- copyField commands copy one field to another at the time a document
241
+ is added to the index. It's used either to index the same field differently,
242
+ or to add multiple fields to the same field for easier/faster searching. -->
243
+
244
+ <copyField source="cat" dest="text"/>
245
+ <copyField source="name" dest="text"/>
246
+ <copyField source="manu" dest="text"/>
247
+ <copyField source="features" dest="text"/>
248
+ <copyField source="includes" dest="text"/>
249
+ <copyField source="manu" dest="manu_exact"/>
250
+
251
+ <!-- Copy the price into a currency enabled field (default USD) -->
252
+ <copyField source="price" dest="price_c"/>
253
+
254
+ <!-- Text fields from SolrCell to search by default in our catch-all field -->
255
+ <copyField source="title" dest="text"/>
256
+ <copyField source="author" dest="text"/>
257
+ <copyField source="description" dest="text"/>
258
+ <copyField source="keywords" dest="text"/>
259
+ <copyField source="content" dest="text"/>
260
+ <copyField source="content_type" dest="text"/>
261
+ <copyField source="resourcename" dest="text"/>
262
+ <copyField source="url" dest="text"/>
263
+
264
+ <!-- Create a string version of author for faceting -->
265
+ <copyField source="author" dest="author_s"/>
266
+
267
+ <!-- Above, multiple source fields are copied to the [text] field.
268
+ Another way to map multiple source fields to the same
269
+ destination field is to use the dynamic field syntax.
270
+ copyField also supports a maxChars to copy setting. -->
271
+
272
+ <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
273
+
274
+ <!-- copy name to alphaNameSort, a field designed for sorting by name -->
275
+ <!-- <copyField source="name" dest="alphaNameSort"/> -->
276
+
277
+
278
+ <!-- field type definitions. The "name" attribute is
279
+ just a label to be used by field definitions. The "class"
280
+ attribute and any other attributes determine the real
281
+ behavior of the fieldType.
282
+ Class names starting with "solr" refer to java classes in a
283
+ standard package such as org.apache.solr.analysis
284
+ -->
285
+
286
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
287
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
288
+
289
+ <!-- boolean type: "true" or "false" -->
290
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
291
+
292
+ <!-- sortMissingLast and sortMissingFirst attributes are optional attributes are
293
+ currently supported on types that are sorted internally as strings
294
+ and on numeric types.
295
+ This includes "string", "boolean", "pint", "pfloat", "plong", "pdate", "pdouble".
296
+ - If sortMissingLast="true", then a sort on this field will cause documents
297
+ without the field to come after documents with the field,
298
+ regardless of the requested sort order (asc or desc).
299
+ - If sortMissingFirst="true", then a sort on this field will cause documents
300
+ without the field to come before documents with the field,
301
+ regardless of the requested sort order.
302
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
303
+ then default lucene sorting will be used which places docs without the
304
+ field first in an ascending sort and last in a descending sort.
305
+ -->
306
+
307
+ <!--
308
+ Numeric field types that index values using KD-trees.
309
+ Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
310
+ -->
311
+ <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
312
+ <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
313
+ <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
314
+ <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
315
+
316
+ <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
317
+ <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
318
+ <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
319
+ <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
320
+
321
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
322
+ is a more restricted form of the canonical representation of dateTime
323
+ http://www.w3.org/TR/xmlschema-2/#dateTime
324
+ The trailing "Z" designates UTC time and is mandatory.
325
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
326
+ All other components are mandatory.
327
+
328
+ Expressions can also be used to denote calculations that should be
329
+ performed relative to "NOW" to determine the value, ie...
330
+
331
+ NOW/HOUR
332
+ ... Round to the start of the current hour
333
+ NOW-1DAY
334
+ ... Exactly 1 day prior to now
335
+ NOW/DAY+6MONTHS+3DAYS
336
+ ... 6 months and 3 days in the future from the start of
337
+ the current day
338
+
339
+ Consult the DatePointField javadocs for more information.
340
+ -->
341
+
342
+ <!-- KD-tree versions of date fields -->
343
+ <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
344
+ <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
345
+
346
+ <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
347
+ <fieldType name="binary" class="solr.BinaryField"/>
348
+
349
+ <!-- The "RandomSortField" is not used to store or search any
350
+ data. You can declare fields of this type it in your schema
351
+ to generate pseudo-random orderings of your docs for sorting
352
+ or function purposes. The ordering is generated based on the field
353
+ name and the version of the index. As long as the index version
354
+ remains unchanged, and the same field name is reused,
355
+ the ordering of the docs will be consistent.
356
+ If you want different psuedo-random orderings of documents,
357
+ for the same version of the index, use a dynamicField and
358
+ change the field name in the request.
359
+ -->
360
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
361
+
362
+ <!-- solr.TextField allows the specification of custom text analyzers
363
+ specified as a tokenizer and a list of token filters. Different
364
+ analyzers may be specified for indexing and querying.
365
+
366
+ The optional positionIncrementGap puts space between multiple fields of
367
+ this type on the same document, with the purpose of preventing false phrase
368
+ matching across fields.
369
+
370
+ For more info on customizing your analyzer chain, please see
371
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
372
+ -->
373
+
374
+ <!-- One can also specify an existing Analyzer class that has a
375
+ default constructor via the class attribute on the analyzer element.
376
+ Example:
377
+ <fieldType name="text_greek" class="solr.TextField">
378
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
379
+ </fieldType>
380
+ -->
381
+
382
+ <!-- A text field that only splits on whitespace for exact matching of words -->
383
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
384
+ <analyzer>
385
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
386
+ </analyzer>
387
+ </fieldType>
388
+
389
+ <!-- A text type for English text where stopwords and synonyms are managed using the REST API -->
390
+ <fieldType name="managed_en" class="solr.TextField" positionIncrementGap="100">
391
+ <analyzer type="index">
392
+ <tokenizer class="solr.StandardTokenizerFactory"/>
393
+ <filter class="solr.ManagedStopFilterFactory" managed="english" />
394
+ <filter class="solr.ManagedSynonymGraphFilterFactory" managed="english" />
395
+ <filter class="solr.FlattenGraphFilterFactory"/>
396
+ </analyzer>
397
+ <analyzer type="query">
398
+ <tokenizer class="solr.StandardTokenizerFactory"/>
399
+ <filter class="solr.ManagedStopFilterFactory" managed="english" />
400
+ <filter class="solr.ManagedSynonymGraphFilterFactory" managed="english" />
401
+ </analyzer>
402
+ </fieldType>
403
+
404
+ <!-- A general text field that has reasonable, generic
405
+ cross-language defaults: it tokenizes with StandardTokenizer,
406
+ removes stop words from case-insensitive "stopwords.txt"
407
+ (empty by default), and down cases. At query time only, it
408
+ also applies synonyms. -->
409
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
410
+ <analyzer type="index">
411
+ <tokenizer class="solr.StandardTokenizerFactory"/>
412
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
413
+ <!-- in this example, we will only use synonyms at query time
414
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
415
+ <filter class="solr.FlattenGraphFilterFactory"/>
416
+ -->
417
+ <filter class="solr.LowerCaseFilterFactory"/>
418
+ </analyzer>
419
+ <analyzer type="query">
420
+ <tokenizer class="solr.StandardTokenizerFactory"/>
421
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
422
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
423
+ <filter class="solr.LowerCaseFilterFactory"/>
424
+ </analyzer>
425
+ </fieldType>
426
+
427
+ <!-- SortableTextField generaly functions exactly like TextField,
428
+ except that it supports, and by default uses, docValues for sorting (or faceting)
429
+ on the first 1024 characters of the original field values (which is configurable).
430
+
431
+ This makes it a bit more useful then TextField in many situations, but the trade-off
432
+ is that it takes up more space on disk; which is why it's not used in place of TextField
433
+ for every fieldType in this _default schema.
434
+ -->
435
+ <fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
436
+ <analyzer type="index">
437
+ <tokenizer class="solr.StandardTokenizerFactory"/>
438
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
439
+ <filter class="solr.LowerCaseFilterFactory"/>
440
+ </analyzer>
441
+ <analyzer type="query">
442
+ <tokenizer class="solr.StandardTokenizerFactory"/>
443
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
444
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
445
+ <filter class="solr.LowerCaseFilterFactory"/>
446
+ </analyzer>
447
+ </fieldType>
448
+
449
+ <!-- A text field with defaults appropriate for English: it
450
+ tokenizes with StandardTokenizer, removes English stop words
451
+ (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
452
+ finally applies Porter's stemming. The query time analyzer
453
+ also applies synonyms from synonyms.txt. -->
454
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
455
+ <analyzer type="index">
456
+ <tokenizer class="solr.StandardTokenizerFactory"/>
457
+ <!-- in this example, we will only use synonyms at query time
458
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
459
+ <filter class="solr.FlattenGraphFilterFactory"/>
460
+ -->
461
+ <!-- Case insensitive stop word removal.
462
+ -->
463
+ <filter class="solr.StopFilterFactory"
464
+ ignoreCase="true"
465
+ words="lang/stopwords_en.txt"
466
+ />
467
+ <filter class="solr.LowerCaseFilterFactory"/>
468
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
469
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
470
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
471
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
472
+ -->
473
+ <filter class="solr.PorterStemFilterFactory"/>
474
+ </analyzer>
475
+ <analyzer type="query">
476
+ <tokenizer class="solr.StandardTokenizerFactory"/>
477
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
478
+ <filter class="solr.StopFilterFactory"
479
+ ignoreCase="true"
480
+ words="lang/stopwords_en.txt"
481
+ />
482
+ <filter class="solr.LowerCaseFilterFactory"/>
483
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
484
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
485
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
486
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
487
+ -->
488
+ <filter class="solr.PorterStemFilterFactory"/>
489
+ </analyzer>
490
+ </fieldType>
491
+
492
+ <!-- A text field with defaults appropriate for English, plus
493
+ aggressive word-splitting and autophrase features enabled.
494
+ This field is just like text_en, except it adds
495
+ WordDelimiterGraphFilter to enable splitting and matching of
496
+ words on case-change, alpha numeric boundaries, and
497
+ non-alphanumeric chars. This means certain compound word
498
+ cases will work, for example query "wi fi" will match
499
+ document "WiFi" or "wi-fi".
500
+ -->
501
+ <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
502
+ <analyzer type="index">
503
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
504
+ <!-- in this example, we will only use synonyms at query time
505
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
506
+ -->
507
+ <!-- Case insensitive stop word removal.
508
+ -->
509
+ <filter class="solr.StopFilterFactory"
510
+ ignoreCase="true"
511
+ words="lang/stopwords_en.txt"
512
+ />
513
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
514
+ <filter class="solr.LowerCaseFilterFactory"/>
515
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
516
+ <filter class="solr.PorterStemFilterFactory"/>
517
+ <filter class="solr.FlattenGraphFilterFactory" />
518
+ </analyzer>
519
+ <analyzer type="query">
520
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
521
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
522
+ <filter class="solr.StopFilterFactory"
523
+ ignoreCase="true"
524
+ words="lang/stopwords_en.txt"
525
+ />
526
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
527
+ <filter class="solr.LowerCaseFilterFactory"/>
528
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
529
+ <filter class="solr.PorterStemFilterFactory"/>
530
+ </analyzer>
531
+ </fieldType>
532
+
533
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
534
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
535
+ <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
536
+ <analyzer type="index">
537
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
538
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
539
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
540
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
541
+ <filter class="solr.LowerCaseFilterFactory"/>
542
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
543
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
544
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
545
+ possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
546
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
547
+ <filter class="solr.FlattenGraphFilterFactory" />
548
+ </analyzer>
549
+ <analyzer type="query">
550
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
551
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
552
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
553
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
554
+ <filter class="solr.LowerCaseFilterFactory"/>
555
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
556
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
557
+ <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
558
+ possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
559
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
560
+ </analyzer>
561
+ </fieldType>
562
+
563
+ <!-- Just like text_general except it reverses the characters of
564
+ each token, to enable more efficient leading wildcard queries. -->
565
+ <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
566
+ <analyzer type="index">
567
+ <tokenizer class="solr.StandardTokenizerFactory"/>
568
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
569
+ <filter class="solr.LowerCaseFilterFactory"/>
570
+ <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
571
+ maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
572
+ </analyzer>
573
+ <analyzer type="query">
574
+ <tokenizer class="solr.StandardTokenizerFactory"/>
575
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
576
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
577
+ <filter class="solr.LowerCaseFilterFactory"/>
578
+ </analyzer>
579
+ </fieldType>
580
+
581
+ <!-- charFilter + WhitespaceTokenizer -->
582
+ <!--
583
+ <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
584
+ <analyzer>
585
+ <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
586
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
587
+ </analyzer>
588
+ </fieldType>
589
+ -->
590
+
591
+ <!-- This is an example of using the KeywordTokenizer along
592
+ With various TokenFilterFactories to produce a sortable field
593
+ that does not include some properties of the source text
594
+ -->
595
+ <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
596
+ <analyzer>
597
+ <!-- KeywordTokenizer does no actual tokenizing, so the entire
598
+ input string is preserved as a single token
599
+ -->
600
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
601
+ <!-- The LowerCase TokenFilter does what you expect, which can be
602
+ when you want your sorting to be case insensitive
603
+ -->
604
+ <filter class="solr.LowerCaseFilterFactory" />
605
+ <!-- The TrimFilter removes any leading or trailing whitespace -->
606
+ <filter class="solr.TrimFilterFactory" />
607
+ <!-- The PatternReplaceFilter gives you the flexibility to use
608
+ Java Regular expression to replace any sequence of characters
609
+ matching a pattern with an arbitrary replacement string,
610
+ which may include back references to portions of the original
611
+ string matched by the pattern.
612
+
613
+ See the Java Regular Expression documentation for more
614
+ information on pattern and replacement string syntax.
615
+
616
+ http://docs.oracle.com/javase/8/docs/api/java/util/regex/package-summary.html
617
+ -->
618
+ <filter class="solr.PatternReplaceFilterFactory"
619
+ pattern="([^a-z])" replacement="" replace="all"
620
+ />
621
+ </analyzer>
622
+ </fieldType>
623
+
624
+ <fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
625
+ <analyzer>
626
+ <tokenizer class="solr.StandardTokenizerFactory"/>
627
+ <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
628
+ </analyzer>
629
+ </fieldType>
630
+
631
+ <fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
632
+ <analyzer>
633
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
634
+ <!--
635
+ The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
636
+ a token of "foo|1.4" would be indexed as "foo" with a payload of 1.4f
637
+ Attributes of the DelimitedPayloadTokenFilterFactory :
638
+ "delimiter" - a one character delimiter. Default is | (pipe)
639
+ "encoder" - how to encode the following value into a playload
640
+ float -> org.apache.lucene.analysis.payloads.FloatEncoder,
641
+ integer -> o.a.l.a.p.IntegerEncoder
642
+ identity -> o.a.l.a.p.IdentityEncoder
643
+ Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
644
+ -->
645
+ <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
646
+ </analyzer>
647
+ </fieldType>
648
+
649
+ <!-- lowercases the entire field value, keeping it as a single token. -->
650
+ <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
651
+ <analyzer>
652
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
653
+ <filter class="solr.LowerCaseFilterFactory" />
654
+ </analyzer>
655
+ </fieldType>
656
+
657
+ <!--
658
+ Example of using PathHierarchyTokenizerFactory at index time, so
659
+ queries for paths match documents at that path, or in descendent paths
660
+ -->
661
+ <fieldType name="descendent_path" class="solr.TextField">
662
+ <analyzer type="index">
663
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
664
+ </analyzer>
665
+ <analyzer type="query">
666
+ <tokenizer class="solr.KeywordTokenizerFactory" />
667
+ </analyzer>
668
+ </fieldType>
669
+ <!--
670
+ Example of using PathHierarchyTokenizerFactory at query time, so
671
+ queries for paths match documents at that path, or in ancestor paths
672
+ -->
673
+ <fieldType name="ancestor_path" class="solr.TextField">
674
+ <analyzer type="index">
675
+ <tokenizer class="solr.KeywordTokenizerFactory" />
676
+ </analyzer>
677
+ <analyzer type="query">
678
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
679
+ </analyzer>
680
+ </fieldType>
681
+
682
+ <!-- since fields of this type are by default not stored or indexed,
683
+ any data added to them will be ignored outright. -->
684
+ <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
685
+
686
+ <!-- This point type indexes the coordinates as separate fields (subFields)
687
+ If subFieldType is defined, it references a type, and a dynamic field
688
+ definition is created matching *___<typename>. Alternately, if
689
+ subFieldSuffix is defined, that is used to create the subFields.
690
+ Example: if subFieldType="double", then the coordinates would be
691
+ indexed in fields myloc_0___double,myloc_1___double.
692
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
693
+ in fields myloc_0_d,myloc_1_d
694
+ The subFields are an implementation detail of the fieldType, and end
695
+ users normally should not need to know about them.
696
+ -->
697
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
698
+
699
+ <!-- A specialized field for geospatial search filters and distance sorting. -->
700
+ <fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>
701
+
702
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
703
+ For more information about this and other Spatial fields new to Solr 4, see:
704
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
705
+ -->
706
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
707
+ geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
708
+
709
+ <!-- Spatial rectangle (bounding box) field. It supports most spatial predicates, and has
710
+ special relevancy modes: score=overlapRatio|area|area2D (local-param to the query). DocValues is recommended for
711
+ relevancy. -->
712
+ <fieldType name="bbox" class="solr.BBoxField"
713
+ geo="true" distanceUnits="kilometers" numberType="pdouble" />
714
+
715
+ <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
716
+ Parameters:
717
+ amountLongSuffix: Required. Refers to a dynamic field for the raw amount sub-field.
718
+ The dynamic field must have a field type that extends LongValueFieldType.
719
+ Note: If you expect to use Atomic Updates, this dynamic field may not be stored.
720
+ codeStrSuffix: Required. Refers to a dynamic field for the currency code sub-field.
721
+ The dynamic field must have a field type that extends StrField.
722
+ Note: If you expect to use Atomic Updates, this dynamic field may not be stored.
723
+ defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
724
+ providerClass: Lets you plug in other exchange provider backend:
725
+ solr.FileExchangeRateProvider is the default and takes one parameter:
726
+ currencyConfig: name of an xml file holding exchange rates
727
+ solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
728
+ ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
729
+ refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
730
+ -->
731
+ <fieldType name="currency" class="solr.CurrencyFieldType" amountLongSuffix="_l_ns" codeStrSuffix="_s_ns"
732
+ defaultCurrency="USD" currencyConfig="currency.xml" />
733
+
734
+
735
+ <!-- some examples for different languages (generally ordered by ISO code) -->
736
+
737
+ <!-- Arabic -->
738
+ <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
739
+ <analyzer>
740
+ <tokenizer class="solr.StandardTokenizerFactory"/>
741
+ <!-- for any non-arabic -->
742
+ <filter class="solr.LowerCaseFilterFactory"/>
743
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
744
+ <!-- normalizes ﻯ to ﻱ, etc -->
745
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
746
+ <filter class="solr.ArabicStemFilterFactory"/>
747
+ </analyzer>
748
+ </fieldType>
749
+
750
+ <!-- Bulgarian -->
751
+ <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
752
+ <analyzer>
753
+ <tokenizer class="solr.StandardTokenizerFactory"/>
754
+ <filter class="solr.LowerCaseFilterFactory"/>
755
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
756
+ <filter class="solr.BulgarianStemFilterFactory"/>
757
+ </analyzer>
758
+ </fieldType>
759
+
760
+ <!-- Catalan -->
761
+ <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
762
+ <analyzer>
763
+ <tokenizer class="solr.StandardTokenizerFactory"/>
764
+ <!-- removes l', etc -->
765
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
766
+ <filter class="solr.LowerCaseFilterFactory"/>
767
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
768
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
769
+ </analyzer>
770
+ </fieldType>
771
+
772
+ <!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
773
+ <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
774
+ <analyzer>
775
+ <tokenizer class="solr.StandardTokenizerFactory"/>
776
+ <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
777
+ <filter class="solr.CJKWidthFilterFactory"/>
778
+ <!-- for any non-CJK -->
779
+ <filter class="solr.LowerCaseFilterFactory"/>
780
+ <filter class="solr.CJKBigramFilterFactory"/>
781
+ </analyzer>
782
+ </fieldType>
783
+
784
+ <!-- Kurdish -->
785
+ <fieldType name="text_ckb" class="solr.TextField" positionIncrementGap="100">
786
+ <analyzer>
787
+ <tokenizer class="solr.StandardTokenizerFactory"/>
788
+ <filter class="solr.SoraniNormalizationFilterFactory"/>
789
+ <!-- for any latin text -->
790
+ <filter class="solr.LowerCaseFilterFactory"/>
791
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
792
+ <filter class="solr.SoraniStemFilterFactory"/>
793
+ </analyzer>
794
+ </fieldType>
795
+
796
+ <!-- Czech -->
797
+ <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
798
+ <analyzer>
799
+ <tokenizer class="solr.StandardTokenizerFactory"/>
800
+ <filter class="solr.LowerCaseFilterFactory"/>
801
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
802
+ <filter class="solr.CzechStemFilterFactory"/>
803
+ </analyzer>
804
+ </fieldType>
805
+
806
+ <!-- Danish -->
807
+ <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
808
+ <analyzer>
809
+ <tokenizer class="solr.StandardTokenizerFactory"/>
810
+ <filter class="solr.LowerCaseFilterFactory"/>
811
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
812
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
813
+ </analyzer>
814
+ </fieldType>
815
+
816
+ <!-- German -->
817
+ <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
818
+ <analyzer>
819
+ <tokenizer class="solr.StandardTokenizerFactory"/>
820
+ <filter class="solr.LowerCaseFilterFactory"/>
821
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
822
+ <filter class="solr.GermanNormalizationFilterFactory"/>
823
+ <filter class="solr.GermanLightStemFilterFactory"/>
824
+ <!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
825
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
826
+ </analyzer>
827
+ </fieldType>
828
+
829
+ <!-- Greek -->
830
+ <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
831
+ <analyzer>
832
+ <tokenizer class="solr.StandardTokenizerFactory"/>
833
+ <!-- greek specific lowercase for sigma -->
834
+ <filter class="solr.GreekLowerCaseFilterFactory"/>
835
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
836
+ <filter class="solr.GreekStemFilterFactory"/>
837
+ </analyzer>
838
+ </fieldType>
839
+
840
+ <!-- Spanish -->
841
+ <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
842
+ <analyzer>
843
+ <tokenizer class="solr.StandardTokenizerFactory"/>
844
+ <filter class="solr.LowerCaseFilterFactory"/>
845
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
846
+ <filter class="solr.SpanishLightStemFilterFactory"/>
847
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
848
+ </analyzer>
849
+ </fieldType>
850
+
851
+ <!-- Basque -->
852
+ <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
853
+ <analyzer>
854
+ <tokenizer class="solr.StandardTokenizerFactory"/>
855
+ <filter class="solr.LowerCaseFilterFactory"/>
856
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
857
+ <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
858
+ </analyzer>
859
+ </fieldType>
860
+
861
+ <!-- Persian -->
862
+ <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
863
+ <analyzer>
864
+ <!-- for ZWNJ -->
865
+ <charFilter class="solr.PersianCharFilterFactory"/>
866
+ <tokenizer class="solr.StandardTokenizerFactory"/>
867
+ <filter class="solr.LowerCaseFilterFactory"/>
868
+ <filter class="solr.ArabicNormalizationFilterFactory"/>
869
+ <filter class="solr.PersianNormalizationFilterFactory"/>
870
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
871
+ </analyzer>
872
+ </fieldType>
873
+
874
+ <!-- Finnish -->
875
+ <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
876
+ <analyzer>
877
+ <tokenizer class="solr.StandardTokenizerFactory"/>
878
+ <filter class="solr.LowerCaseFilterFactory"/>
879
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
880
+ <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
881
+ <!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
882
+ </analyzer>
883
+ </fieldType>
884
+
885
+ <!-- French -->
886
+ <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
887
+ <analyzer>
888
+ <tokenizer class="solr.StandardTokenizerFactory"/>
889
+ <!-- removes l', etc -->
890
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
891
+ <filter class="solr.LowerCaseFilterFactory"/>
892
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
893
+ <filter class="solr.FrenchLightStemFilterFactory"/>
894
+ <!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
895
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
896
+ </analyzer>
897
+ </fieldType>
898
+
899
+ <!-- Irish -->
900
+ <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
901
+ <analyzer>
902
+ <tokenizer class="solr.StandardTokenizerFactory"/>
903
+ <!-- removes d', etc -->
904
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
905
+ <!-- removes n-, etc. position increments is intentionally false! -->
906
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
907
+ <filter class="solr.IrishLowerCaseFilterFactory"/>
908
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
909
+ <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
910
+ </analyzer>
911
+ </fieldType>
912
+
913
+ <!-- Galician -->
914
+ <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
915
+ <analyzer>
916
+ <tokenizer class="solr.StandardTokenizerFactory"/>
917
+ <filter class="solr.LowerCaseFilterFactory"/>
918
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
919
+ <filter class="solr.GalicianStemFilterFactory"/>
920
+ <!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
921
+ </analyzer>
922
+ </fieldType>
923
+
924
+ <!-- Hindi -->
925
+ <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
926
+ <analyzer>
927
+ <tokenizer class="solr.StandardTokenizerFactory"/>
928
+ <filter class="solr.LowerCaseFilterFactory"/>
929
+ <!-- normalizes unicode representation -->
930
+ <filter class="solr.IndicNormalizationFilterFactory"/>
931
+ <!-- normalizes variation in spelling -->
932
+ <filter class="solr.HindiNormalizationFilterFactory"/>
933
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
934
+ <filter class="solr.HindiStemFilterFactory"/>
935
+ </analyzer>
936
+ </fieldType>
937
+
938
+ <!-- Hungarian -->
939
+ <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
940
+ <analyzer>
941
+ <tokenizer class="solr.StandardTokenizerFactory"/>
942
+ <filter class="solr.LowerCaseFilterFactory"/>
943
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
944
+ <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
945
+ <!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
946
+ </analyzer>
947
+ </fieldType>
948
+
949
+ <!-- Armenian -->
950
+ <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
951
+ <analyzer>
952
+ <tokenizer class="solr.StandardTokenizerFactory"/>
953
+ <filter class="solr.LowerCaseFilterFactory"/>
954
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
955
+ <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
956
+ </analyzer>
957
+ </fieldType>
958
+
959
+ <!-- Indonesian -->
960
+ <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
961
+ <analyzer>
962
+ <tokenizer class="solr.StandardTokenizerFactory"/>
963
+ <filter class="solr.LowerCaseFilterFactory"/>
964
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
965
+ <!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
966
+ <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
967
+ </analyzer>
968
+ </fieldType>
969
+
970
+ <!-- Italian -->
971
+ <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
972
+ <analyzer>
973
+ <tokenizer class="solr.StandardTokenizerFactory"/>
974
+ <!-- removes l', etc -->
975
+ <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
976
+ <filter class="solr.LowerCaseFilterFactory"/>
977
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
978
+ <filter class="solr.ItalianLightStemFilterFactory"/>
979
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
980
+ </analyzer>
981
+ </fieldType>
982
+
983
+ <!-- Japanese using morphological analysis (see text_cjk for a configuration using bigramming)
984
+
985
+ NOTE: If you want to optimize search for precision, use default operator AND in your request
986
+ handler config (q.op) Use OR if you would like to optimize for recall (default).
987
+ -->
988
+ <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
989
+ <analyzer>
990
+ <!-- Kuromoji Japanese morphological analyzer/tokenizer (JapaneseTokenizer)
991
+
992
+ Kuromoji has a search mode (default) that does segmentation useful for search. A heuristic
993
+ is used to segment compounds into its parts and the compound itself is kept as synonym.
994
+
995
+ Valid values for attribute mode are:
996
+ normal: regular segmentation
997
+ search: segmentation useful for search with synonyms compounds (default)
998
+ extended: same as search mode, but unigrams unknown words (experimental)
999
+
1000
+ For some applications it might be good to use search mode for indexing and normal mode for
1001
+ queries to reduce recall and prevent parts of compounds from being matched and highlighted.
1002
+ Use <analyzer type="index"> and <analyzer type="query"> for this and mode normal in query.
1003
+
1004
+ Kuromoji also has a convenient user dictionary feature that allows overriding the statistical
1005
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
1006
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
1007
+
1008
+ User dictionary attributes are:
1009
+ userDictionary: user dictionary filename
1010
+ userDictionaryEncoding: user dictionary encoding (default is UTF-8)
1011
+
1012
+ See lang/userdict_ja.txt for a sample user dictionary file.
1013
+
1014
+ Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
1015
+
1016
+ See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
1017
+ -->
1018
+ <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
1019
+ <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
1020
+ <!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
1021
+ <filter class="solr.JapaneseBaseFormFilterFactory"/>
1022
+ <!-- Removes tokens with certain part-of-speech tags -->
1023
+ <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
1024
+ <!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
1025
+ <filter class="solr.CJKWidthFilterFactory"/>
1026
+ <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
1027
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
1028
+ <!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
1029
+ <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
1030
+ <!-- Lower-cases romaji characters -->
1031
+ <filter class="solr.LowerCaseFilterFactory"/>
1032
+ </analyzer>
1033
+ </fieldType>
1034
+
1035
+ <!-- Korean morphological analysis -->
1036
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
1037
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
1038
+ <analyzer>
1039
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
1040
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
1041
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
1042
+
1043
+ This dictionary was built with MeCab, it defines a format for the features adapted
1044
+ for the Korean language.
1045
+
1046
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
1047
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
1048
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
1049
+
1050
+ The tokenizer supports multiple schema attributes:
1051
+ * userDictionary: User dictionary path.
1052
+ * userDictionaryEncoding: User dictionary encoding.
1053
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
1054
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
1055
+ -->
1056
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
1057
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
1058
+ listing the tags to remove. By default it removes:
1059
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
1060
+ This is basically an equivalent to stemming.
1061
+ -->
1062
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
1063
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
1064
+ <filter class="solr.KoreanReadingFormFilterFactory" />
1065
+ <filter class="solr.LowerCaseFilterFactory" />
1066
+ </analyzer>
1067
+ </fieldType>
1068
+
1069
+ <!-- Latvian -->
1070
+ <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
1071
+ <analyzer>
1072
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1073
+ <filter class="solr.LowerCaseFilterFactory"/>
1074
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
1075
+ <filter class="solr.LatvianStemFilterFactory"/>
1076
+ </analyzer>
1077
+ </fieldType>
1078
+
1079
+ <!-- Dutch -->
1080
+ <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
1081
+ <analyzer>
1082
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1083
+ <filter class="solr.LowerCaseFilterFactory"/>
1084
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
1085
+ <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
1086
+ <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
1087
+ </analyzer>
1088
+ </fieldType>
1089
+
1090
+ <!-- Norwegian -->
1091
+ <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
1092
+ <analyzer>
1093
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1094
+ <filter class="solr.LowerCaseFilterFactory"/>
1095
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
1096
+ <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
1097
+ <!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
1098
+ <!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
1099
+ <!-- The "light" and "minimal" stemmers support variants: nb=Bokmål, nn=Nynorsk, no=Both -->
1100
+ </analyzer>
1101
+ </fieldType>
1102
+
1103
+ <!-- Portuguese -->
1104
+ <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
1105
+ <analyzer>
1106
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1107
+ <filter class="solr.LowerCaseFilterFactory"/>
1108
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
1109
+ <filter class="solr.PortugueseLightStemFilterFactory"/>
1110
+ <!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
1111
+ <!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
1112
+ <!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
1113
+ </analyzer>
1114
+ </fieldType>
1115
+
1116
+ <!-- Romanian -->
1117
+ <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
1118
+ <analyzer>
1119
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1120
+ <filter class="solr.LowerCaseFilterFactory"/>
1121
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
1122
+ <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
1123
+ </analyzer>
1124
+ </fieldType>
1125
+
1126
+ <!-- Russian -->
1127
+ <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
1128
+ <analyzer>
1129
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1130
+ <filter class="solr.LowerCaseFilterFactory"/>
1131
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
1132
+ <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
1133
+ <!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
1134
+ </analyzer>
1135
+ </fieldType>
1136
+
1137
+ <!-- Swedish -->
1138
+ <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
1139
+ <analyzer>
1140
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1141
+ <filter class="solr.LowerCaseFilterFactory"/>
1142
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
1143
+ <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
1144
+ <!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
1145
+ </analyzer>
1146
+ </fieldType>
1147
+
1148
+ <!-- Thai -->
1149
+ <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
1150
+ <analyzer>
1151
+ <tokenizer class="solr.ThaiTokenizerFactory"/>
1152
+ <filter class="solr.LowerCaseFilterFactory"/>
1153
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
1154
+ </analyzer>
1155
+ </fieldType>
1156
+
1157
+ <!-- Turkish -->
1158
+ <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
1159
+ <analyzer>
1160
+ <tokenizer class="solr.StandardTokenizerFactory"/>
1161
+ <filter class="solr.ApostropheFilterFactory"/>
1162
+ <filter class="solr.TurkishLowerCaseFilterFactory"/>
1163
+ <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
1164
+ <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
1165
+ </analyzer>
1166
+ </fieldType>
1167
+
1168
+ <!-- Pre-analyzed field type, allows inserting arbitrary token streams and stored values. -->
1169
+ <fieldType name="preanalyzed" class="solr.PreAnalyzedField">
1170
+ <!-- PreAnalyzedField's builtin index analyzer just decodes the pre-analyzed token stream. -->
1171
+ <analyzer type="query">
1172
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
1173
+ </analyzer>
1174
+ </fieldType>
1175
+
1176
+ <!-- Similarity is the scoring routine for each document vs. a query.
1177
+ A custom Similarity or SimilarityFactory may be specified here, but
1178
+ the default is fine for most applications.
1179
+ For more info: http://wiki.apache.org/solr/SchemaXml#Similarity
1180
+ -->
1181
+ <!--
1182
+ <similarity class="com.example.solr.CustomSimilarityFactory">
1183
+ <str name="paramkey">param value</str>
1184
+ </similarity>
1185
+ -->
1186
+
1187
+ </schema>