simple_solr_client 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (136) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +349 -0
  5. data/Rakefile +11 -0
  6. data/lib/simple_solr.rb +42 -0
  7. data/lib/simple_solr/client.rb +139 -0
  8. data/lib/simple_solr/client/core_admin.rb +0 -0
  9. data/lib/simple_solr/core.rb +50 -0
  10. data/lib/simple_solr/core/admin.rb +47 -0
  11. data/lib/simple_solr/core/core_data.rb +51 -0
  12. data/lib/simple_solr/core/index.rb +25 -0
  13. data/lib/simple_solr/core/search.rb +21 -0
  14. data/lib/simple_solr/response/document.rb +45 -0
  15. data/lib/simple_solr/response/generic_response.rb +19 -0
  16. data/lib/simple_solr/response/query_response.rb +54 -0
  17. data/lib/simple_solr/schema.rb +261 -0
  18. data/lib/simple_solr/schema/analysis.rb +58 -0
  19. data/lib/simple_solr/schema/copyfield.rb +42 -0
  20. data/lib/simple_solr/schema/dynamic_field.rb +23 -0
  21. data/lib/simple_solr/schema/field.rb +35 -0
  22. data/lib/simple_solr/schema/field_or_type.rb +112 -0
  23. data/lib/simple_solr/schema/field_type.rb +62 -0
  24. data/lib/simple_solr/schema/matcher.rb +16 -0
  25. data/lib/simple_solr/version.rb +3 -0
  26. data/simple_solr_client.gemspec +39 -0
  27. data/solr_sample_core/conf/_schema_analysis_stopwords_english.json +38 -0
  28. data/solr_sample_core/conf/_schema_analysis_synonyms_english.json +11 -0
  29. data/solr_sample_core/conf/admin-extra.html +24 -0
  30. data/solr_sample_core/conf/admin-extra.menu-bottom.html +25 -0
  31. data/solr_sample_core/conf/admin-extra.menu-top.html +25 -0
  32. data/solr_sample_core/conf/clustering/carrot2/kmeans-attributes.xml +19 -0
  33. data/solr_sample_core/conf/clustering/carrot2/lingo-attributes.xml +24 -0
  34. data/solr_sample_core/conf/clustering/carrot2/stc-attributes.xml +19 -0
  35. data/solr_sample_core/conf/currency.xml +67 -0
  36. data/solr_sample_core/conf/elevate.xml +38 -0
  37. data/solr_sample_core/conf/lang/contractions_ca.txt +8 -0
  38. data/solr_sample_core/conf/lang/contractions_fr.txt +15 -0
  39. data/solr_sample_core/conf/lang/contractions_ga.txt +5 -0
  40. data/solr_sample_core/conf/lang/contractions_it.txt +23 -0
  41. data/solr_sample_core/conf/lang/hyphenations_ga.txt +5 -0
  42. data/solr_sample_core/conf/lang/stemdict_nl.txt +6 -0
  43. data/solr_sample_core/conf/lang/stoptags_ja.txt +420 -0
  44. data/solr_sample_core/conf/lang/stopwords_ar.txt +125 -0
  45. data/solr_sample_core/conf/lang/stopwords_bg.txt +193 -0
  46. data/solr_sample_core/conf/lang/stopwords_ca.txt +220 -0
  47. data/solr_sample_core/conf/lang/stopwords_ckb.txt +136 -0
  48. data/solr_sample_core/conf/lang/stopwords_cz.txt +172 -0
  49. data/solr_sample_core/conf/lang/stopwords_da.txt +110 -0
  50. data/solr_sample_core/conf/lang/stopwords_de.txt +294 -0
  51. data/solr_sample_core/conf/lang/stopwords_el.txt +78 -0
  52. data/solr_sample_core/conf/lang/stopwords_en.txt +54 -0
  53. data/solr_sample_core/conf/lang/stopwords_es.txt +356 -0
  54. data/solr_sample_core/conf/lang/stopwords_eu.txt +99 -0
  55. data/solr_sample_core/conf/lang/stopwords_fa.txt +313 -0
  56. data/solr_sample_core/conf/lang/stopwords_fi.txt +97 -0
  57. data/solr_sample_core/conf/lang/stopwords_fr.txt +186 -0
  58. data/solr_sample_core/conf/lang/stopwords_ga.txt +110 -0
  59. data/solr_sample_core/conf/lang/stopwords_gl.txt +161 -0
  60. data/solr_sample_core/conf/lang/stopwords_hi.txt +235 -0
  61. data/solr_sample_core/conf/lang/stopwords_hu.txt +211 -0
  62. data/solr_sample_core/conf/lang/stopwords_hy.txt +46 -0
  63. data/solr_sample_core/conf/lang/stopwords_id.txt +359 -0
  64. data/solr_sample_core/conf/lang/stopwords_it.txt +303 -0
  65. data/solr_sample_core/conf/lang/stopwords_ja.txt +127 -0
  66. data/solr_sample_core/conf/lang/stopwords_lv.txt +172 -0
  67. data/solr_sample_core/conf/lang/stopwords_nl.txt +119 -0
  68. data/solr_sample_core/conf/lang/stopwords_no.txt +194 -0
  69. data/solr_sample_core/conf/lang/stopwords_pt.txt +253 -0
  70. data/solr_sample_core/conf/lang/stopwords_ro.txt +233 -0
  71. data/solr_sample_core/conf/lang/stopwords_ru.txt +243 -0
  72. data/solr_sample_core/conf/lang/stopwords_sv.txt +133 -0
  73. data/solr_sample_core/conf/lang/stopwords_th.txt +119 -0
  74. data/solr_sample_core/conf/lang/stopwords_tr.txt +212 -0
  75. data/solr_sample_core/conf/lang/userdict_ja.txt +29 -0
  76. data/solr_sample_core/conf/mapping-FoldToASCII.txt +3813 -0
  77. data/solr_sample_core/conf/mapping-ISOLatin1Accent.txt +246 -0
  78. data/solr_sample_core/conf/protwords.txt +21 -0
  79. data/solr_sample_core/conf/schema.xml +62 -0
  80. data/solr_sample_core/conf/scripts.conf +24 -0
  81. data/solr_sample_core/conf/solrconfig.xml +1702 -0
  82. data/solr_sample_core/conf/spellings.txt +2 -0
  83. data/solr_sample_core/conf/stopwords.txt +14 -0
  84. data/solr_sample_core/conf/syn.txt +0 -0
  85. data/solr_sample_core/conf/synonyms.txt +29 -0
  86. data/solr_sample_core/conf/token_fixing_charfilter.txt +110 -0
  87. data/solr_sample_core/conf/update-script.js +53 -0
  88. data/solr_sample_core/conf/velocity/README.txt +101 -0
  89. data/solr_sample_core/conf/velocity/VM_global_library.vm +175 -0
  90. data/solr_sample_core/conf/velocity/browse.vm +33 -0
  91. data/solr_sample_core/conf/velocity/cluster.vm +19 -0
  92. data/solr_sample_core/conf/velocity/cluster_results.vm +31 -0
  93. data/solr_sample_core/conf/velocity/debug.vm +28 -0
  94. data/solr_sample_core/conf/velocity/did_you_mean.vm +9 -0
  95. data/solr_sample_core/conf/velocity/error.vm +11 -0
  96. data/solr_sample_core/conf/velocity/facet_fields.vm +23 -0
  97. data/solr_sample_core/conf/velocity/facet_pivot.vm +12 -0
  98. data/solr_sample_core/conf/velocity/facet_queries.vm +12 -0
  99. data/solr_sample_core/conf/velocity/facet_ranges.vm +23 -0
  100. data/solr_sample_core/conf/velocity/facets.vm +10 -0
  101. data/solr_sample_core/conf/velocity/footer.vm +43 -0
  102. data/solr_sample_core/conf/velocity/head.vm +35 -0
  103. data/solr_sample_core/conf/velocity/header.vm +7 -0
  104. data/solr_sample_core/conf/velocity/hit.vm +25 -0
  105. data/solr_sample_core/conf/velocity/hit_grouped.vm +43 -0
  106. data/solr_sample_core/conf/velocity/hit_plain.vm +25 -0
  107. data/solr_sample_core/conf/velocity/join_doc.vm +20 -0
  108. data/solr_sample_core/conf/velocity/jquery.autocomplete.css +48 -0
  109. data/solr_sample_core/conf/velocity/jquery.autocomplete.js +763 -0
  110. data/solr_sample_core/conf/velocity/layout.vm +24 -0
  111. data/solr_sample_core/conf/velocity/main.css +230 -0
  112. data/solr_sample_core/conf/velocity/mime_type_lists.vm +68 -0
  113. data/solr_sample_core/conf/velocity/pagination_bottom.vm +22 -0
  114. data/solr_sample_core/conf/velocity/pagination_top.vm +29 -0
  115. data/solr_sample_core/conf/velocity/product_doc.vm +32 -0
  116. data/solr_sample_core/conf/velocity/query.vm +42 -0
  117. data/solr_sample_core/conf/velocity/query_form.vm +64 -0
  118. data/solr_sample_core/conf/velocity/query_group.vm +43 -0
  119. data/solr_sample_core/conf/velocity/query_spatial.vm +75 -0
  120. data/solr_sample_core/conf/velocity/results_list.vm +22 -0
  121. data/solr_sample_core/conf/velocity/richtext_doc.vm +153 -0
  122. data/solr_sample_core/conf/velocity/suggest.vm +8 -0
  123. data/solr_sample_core/conf/velocity/tabs.vm +50 -0
  124. data/solr_sample_core/conf/xslt/example.xsl +132 -0
  125. data/solr_sample_core/conf/xslt/example_atom.xsl +67 -0
  126. data/solr_sample_core/conf/xslt/example_rss.xsl +66 -0
  127. data/solr_sample_core/conf/xslt/luke.xsl +337 -0
  128. data/solr_sample_core/conf/xslt/updateXml.xsl +70 -0
  129. data/spec/client_basics_spec.rb +26 -0
  130. data/spec/connect_spec.rb +25 -0
  131. data/spec/core_basics.rb +21 -0
  132. data/spec/index_spec.rb +31 -0
  133. data/spec/load_spec.rb +7 -0
  134. data/spec/minitest_helper.rb +36 -0
  135. data/spec/schema_spec.rb +113 -0
  136. metadata +284 -0
@@ -0,0 +1,24 @@
1
+ <!--
2
+ Licensed to the Apache Software Foundation (ASF) under one or more
3
+ contributor license agreements. See the NOTICE file distributed with
4
+ this work for additional information regarding copyright ownership.
5
+ The ASF licenses this file to You under the Apache License, Version 2.0
6
+ (the "License"); you may not use this file except in compliance with
7
+ the License. You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ <!-- The content of this page will be statically included into the top-
19
+ right box of the cores overview page. Uncomment this as an example to
20
+ see there the content will show up.
21
+
22
+ <img src="img/ico/construction.png"> This line will appear at the top-
23
+ right box on collection1's Overview
24
+ -->
@@ -0,0 +1,25 @@
1
+ <!--
2
+ Licensed to the Apache Software Foundation (ASF) under one or more
3
+ contributor license agreements. See the NOTICE file distributed with
4
+ this work for additional information regarding copyright ownership.
5
+ The ASF licenses this file to You under the Apache License, Version 2.0
6
+ (the "License"); you may not use this file except in compliance with
7
+ the License. You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ <!-- admin-extra.menu-bottom.html -->
19
+ <!--
20
+ <li>
21
+ <a href="#" style="background-image: url(img/ico/construction.png);">
22
+ LAST ITEM
23
+ </a>
24
+ </li>
25
+ -->
@@ -0,0 +1,25 @@
1
+ <!--
2
+ Licensed to the Apache Software Foundation (ASF) under one or more
3
+ contributor license agreements. See the NOTICE file distributed with
4
+ this work for additional information regarding copyright ownership.
5
+ The ASF licenses this file to You under the Apache License, Version 2.0
6
+ (the "License"); you may not use this file except in compliance with
7
+ the License. You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ -->
17
+
18
+ <!-- admin-extra.menu-top.html -->
19
+ <!--
20
+ <li>
21
+ <a href="#" style="background-image: url(img/ico/construction.png);">
22
+ FIRST ITEM
23
+ </a>
24
+ </li>
25
+ -->
@@ -0,0 +1,19 @@
1
+ <!--
2
+ Default configuration for the bisecting k-means clustering algorithm.
3
+
4
+ This file can be loaded (and saved) by Carrot2 Workbench.
5
+ http://project.carrot2.org/download.html
6
+ -->
7
+ <attribute-sets default="attributes">
8
+ <attribute-set id="attributes">
9
+ <value-set>
10
+ <label>attributes</label>
11
+ <attribute key="MultilingualClustering.defaultLanguage">
12
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
13
+ </attribute>
14
+ <attribute key="MultilingualClustering.languageAggregationStrategy">
15
+ <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
16
+ </attribute>
17
+ </value-set>
18
+ </attribute-set>
19
+ </attribute-sets>
@@ -0,0 +1,24 @@
1
+ <!--
2
+ Default configuration for the Lingo clustering algorithm.
3
+
4
+ This file can be loaded (and saved) by Carrot2 Workbench.
5
+ http://project.carrot2.org/download.html
6
+ -->
7
+ <attribute-sets default="attributes">
8
+ <attribute-set id="attributes">
9
+ <value-set>
10
+ <label>attributes</label>
11
+ <!--
12
+ The language to assume for clustered documents.
13
+ For a list of allowed values, see:
14
+ http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
15
+ -->
16
+ <attribute key="MultilingualClustering.defaultLanguage">
17
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
18
+ </attribute>
19
+ <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
20
+ <value type="java.lang.Integer" value="20"/>
21
+ </attribute>
22
+ </value-set>
23
+ </attribute-set>
24
+ </attribute-sets>
@@ -0,0 +1,19 @@
1
+ <!--
2
+ Default configuration for the STC clustering algorithm.
3
+
4
+ This file can be loaded (and saved) by Carrot2 Workbench.
5
+ http://project.carrot2.org/download.html
6
+ -->
7
+ <attribute-sets default="attributes">
8
+ <attribute-set id="attributes">
9
+ <value-set>
10
+ <label>attributes</label>
11
+ <attribute key="MultilingualClustering.defaultLanguage">
12
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
13
+ </attribute>
14
+ <attribute key="MultilingualClustering.languageAggregationStrategy">
15
+ <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
16
+ </attribute>
17
+ </value-set>
18
+ </attribute-set>
19
+ </attribute-sets>
@@ -0,0 +1,67 @@
1
+ <?xml version="1.0" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
20
+
21
+ <currencyConfig version="1.0">
22
+ <rates>
23
+ <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
24
+ <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
25
+ <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
26
+ <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
27
+ <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
28
+ <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
29
+ <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
30
+ <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
31
+ <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
32
+ <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
33
+ <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
34
+ <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
35
+ <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
36
+ <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
37
+ <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
38
+ <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
39
+ <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
40
+ <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
41
+ <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
42
+ <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
43
+ <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
44
+ <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
45
+ <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
46
+ <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
47
+ <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
48
+ <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
49
+ <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
50
+ <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
51
+ <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
52
+ <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
53
+ <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
54
+ <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
55
+ <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
56
+ <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
57
+ <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
58
+ <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
59
+ <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
60
+ <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
61
+
62
+ <!-- Cross-rates for some common currencies -->
63
+ <rate from="EUR" to="GBP" rate="0.869914" />
64
+ <rate from="EUR" to="NOK" rate="7.800095" />
65
+ <rate from="GBP" to="NOK" rate="8.966508" />
66
+ </rates>
67
+ </currencyConfig>
@@ -0,0 +1,38 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!-- If this file is found in the config directory, it will only be
20
+ loaded once at startup. If it is found in Solr's data
21
+ directory, it will be re-loaded every commit.
22
+
23
+ See http://wiki.apache.org/solr/QueryElevationComponent for more info
24
+
25
+ -->
26
+ <elevate>
27
+ <query text="foo bar">
28
+ <doc id="1" />
29
+ <doc id="2" />
30
+ <doc id="3" />
31
+ </query>
32
+
33
+ <query text="ipod">
34
+ <doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
35
+ <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
36
+ </query>
37
+
38
+ </elevate>
@@ -0,0 +1,8 @@
1
+ # Set of Catalan contractions for ElisionFilter
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ d
4
+ l
5
+ m
6
+ n
7
+ s
8
+ t
@@ -0,0 +1,15 @@
1
+ # Set of French contractions for ElisionFilter
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ l
4
+ m
5
+ t
6
+ qu
7
+ n
8
+ s
9
+ j
10
+ d
11
+ c
12
+ jusqu
13
+ quoiqu
14
+ lorsqu
15
+ puisqu
@@ -0,0 +1,5 @@
1
+ # Set of Irish contractions for ElisionFilter
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ d
4
+ m
5
+ b
@@ -0,0 +1,23 @@
1
+ # Set of Italian contractions for ElisionFilter
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ c
4
+ l
5
+ all
6
+ dall
7
+ dell
8
+ nell
9
+ sull
10
+ coll
11
+ pell
12
+ gl
13
+ agl
14
+ dagl
15
+ degl
16
+ negl
17
+ sugl
18
+ un
19
+ m
20
+ t
21
+ s
22
+ v
23
+ d
@@ -0,0 +1,5 @@
1
+ # Set of Irish hyphenations for StopFilter
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ h
4
+ n
5
+ t
@@ -0,0 +1,6 @@
1
+ # Set of overrides for the dutch stemmer
2
+ # TODO: load this as a resource from the analyzer and sync it in build.xml
3
+ fiets fiets
4
+ bromfiets bromfiets
5
+ ei eier
6
+ kind kinder
@@ -0,0 +1,420 @@
1
+ #
2
+ # This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
3
+ #
4
+ # Any token with a part-of-speech tag that exactly matches those defined in this
5
+ # file are removed from the token stream.
6
+ #
7
+ # Set your own stoptags by uncommenting the lines below. Note that comments are
8
+ # not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
9
+ # etc. that can be useful for building you own stoptag set.
10
+ #
11
+ # The entire possible tagset is provided below for convenience.
12
+ #
13
+ #####
14
+ # noun: unclassified nouns
15
+ #名詞
16
+ #
17
+ # noun-common: Common nouns or nouns where the sub-classification is undefined
18
+ #名詞-一般
19
+ #
20
+ # noun-proper: Proper nouns where the sub-classification is undefined
21
+ #名詞-固有名詞
22
+ #
23
+ # noun-proper-misc: miscellaneous proper nouns
24
+ #名詞-固有名詞-一般
25
+ #
26
+ # noun-proper-person: Personal names where the sub-classification is undefined
27
+ #名詞-固有名詞-人名
28
+ #
29
+ # noun-proper-person-misc: names that cannot be divided into surname and
30
+ # given name; foreign names; names where the surname or given name is unknown.
31
+ # e.g. お市の方
32
+ #名詞-固有名詞-人名-一般
33
+ #
34
+ # noun-proper-person-surname: Mainly Japanese surnames.
35
+ # e.g. 山田
36
+ #名詞-固有名詞-人名-姓
37
+ #
38
+ # noun-proper-person-given_name: Mainly Japanese given names.
39
+ # e.g. 太郎
40
+ #名詞-固有名詞-人名-名
41
+ #
42
+ # noun-proper-organization: Names representing organizations.
43
+ # e.g. 通産省, NHK
44
+ #名詞-固有名詞-組織
45
+ #
46
+ # noun-proper-place: Place names where the sub-classification is undefined
47
+ #名詞-固有名詞-地域
48
+ #
49
+ # noun-proper-place-misc: Place names excluding countries.
50
+ # e.g. アジア, バルセロナ, 京都
51
+ #名詞-固有名詞-地域-一般
52
+ #
53
+ # noun-proper-place-country: Country names.
54
+ # e.g. 日本, オーストラリア
55
+ #名詞-固有名詞-地域-国
56
+ #
57
+ # noun-pronoun: Pronouns where the sub-classification is undefined
58
+ #名詞-代名詞
59
+ #
60
+ # noun-pronoun-misc: miscellaneous pronouns:
61
+ # e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
62
+ #名詞-代名詞-一般
63
+ #
64
+ # noun-pronoun-contraction: Spoken language contraction made by combining a
65
+ # pronoun and the particle 'wa'.
66
+ # e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
67
+ #名詞-代名詞-縮約
68
+ #
69
+ # noun-adverbial: Temporal nouns such as names of days or months that behave
70
+ # like adverbs. Nouns that represent amount or ratios and can be used adverbially,
71
+ # e.g. 金曜, 一月, 午後, 少量
72
+ #名詞-副詞可能
73
+ #
74
+ # noun-verbal: Nouns that take arguments with case and can appear followed by
75
+ # 'suru' and related verbs (する, できる, なさる, くださる)
76
+ # e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
77
+ #名詞-サ変接続
78
+ #
79
+ # noun-adjective-base: The base form of adjectives, words that appear before な ("na")
80
+ # e.g. 健康, 安易, 駄目, だめ
81
+ #名詞-形容動詞語幹
82
+ #
83
+ # noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
84
+ # e.g. 0, 1, 2, 何, 数, 幾
85
+ #名詞-数
86
+ #
87
+ # noun-affix: noun affixes where the sub-classification is undefined
88
+ #名詞-非自立
89
+ #
90
+ # noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
91
+ # attach to the base form of inflectional words, words that cannot be classified
92
+ # into any of the other categories below. This category includes indefinite nouns.
93
+ # e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
94
+ # 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
95
+ # 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
96
+ # わり, 割り, 割, ん-口語/, もん-口語/
97
+ #名詞-非自立-一般
98
+ #
99
+ # noun-affix-adverbial: noun affixes that that can behave as adverbs.
100
+ # e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
101
+ # 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
102
+ # 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
103
+ # とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
104
+ # 儘, 侭, みぎり, 矢先
105
+ #名詞-非自立-副詞可能
106
+ #
107
+ # noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
108
+ # with the stem よう(だ) ("you(da)").
109
+ # e.g. よう, やう, 様 (よう)
110
+ #名詞-非自立-助動詞語幹
111
+ #
112
+ # noun-affix-adjective-base: noun affixes that can connect to the indeclinable
113
+ # connection form な (aux "da").
114
+ # e.g. みたい, ふう
115
+ #名詞-非自立-形容動詞語幹
116
+ #
117
+ # noun-special: special nouns where the sub-classification is undefined.
118
+ #名詞-特殊
119
+ #
120
+ # noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
121
+ # treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
122
+ # form of inflectional words.
123
+ # e.g. そう
124
+ #名詞-特殊-助動詞語幹
125
+ #
126
+ # noun-suffix: noun suffixes where the sub-classification is undefined.
127
+ #名詞-接尾
128
+ #
129
+ # noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
130
+ # to ガル or タイ and can combine into compound nouns, words that cannot be classified into
131
+ # any of the other categories below. In general, this category is more inclusive than
132
+ # 接尾語 ("suffix") and is usually the last element in a compound noun.
133
+ # e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
134
+ # よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
135
+ #名詞-接尾-一般
136
+ #
137
+ # noun-suffix-person: Suffixes that form nouns and attach to person names more often
138
+ # than other nouns.
139
+ # e.g. 君, 様, 著
140
+ #名詞-接尾-人名
141
+ #
142
+ # noun-suffix-place: Suffixes that form nouns and attach to place names more often
143
+ # than other nouns.
144
+ # e.g. 町, 市, 県
145
+ #名詞-接尾-地域
146
+ #
147
+ # noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
148
+ # can appear before スル ("suru").
149
+ # e.g. 化, 視, 分け, 入り, 落ち, 買い
150
+ #名詞-接尾-サ変接続
151
+ #
152
+ # noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
153
+ # is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
154
+ # conjunctive form of inflectional words.
155
+ # e.g. そう
156
+ #名詞-接尾-助動詞語幹
157
+ #
158
+ # noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
159
+ # form of inflectional words and appear before the copula だ ("da").
160
+ # e.g. 的, げ, がち
161
+ #名詞-接尾-形容動詞語幹
162
+ #
163
+ # noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
164
+ # e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
165
+ #名詞-接尾-副詞可能
166
+ #
167
+ # noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
168
+ # is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
169
+ # to numbers.
170
+ # e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
171
+ #名詞-接尾-助数詞
172
+ #
173
+ # noun-suffix-special: Special suffixes that mainly attach to inflecting words.
174
+ # e.g. (楽し) さ, (考え) 方
175
+ #名詞-接尾-特殊
176
+ #
177
+ # noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
178
+ # together.
179
+ # e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
180
+ #名詞-接続詞的
181
+ #
182
+ # noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
183
+ # semantically verb-like.
184
+ # e.g. ごらん, ご覧, 御覧, 頂戴
185
+ #名詞-動詞非自立的
186
+ #
187
+ # noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
188
+ # dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
189
+ # is いわく ("iwaku").
190
+ #名詞-引用文字列
191
+ #
192
+ # noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
193
+ # behave like an adjective.
194
+ # e.g. 申し訳, 仕方, とんでも, 違い
195
+ #名詞-ナイ形容詞語幹
196
+ #
197
+ #####
198
+ # prefix: unclassified prefixes
199
+ #接頭詞
200
+ #
201
+ # prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
202
+ # excluding numerical expressions.
203
+ # e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
204
+ #接頭詞-名詞接続
205
+ #
206
+ # prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
207
+ # in conjunctive form followed by なる/なさる/くださる.
208
+ # e.g. お (読みなさい), お (座り)
209
+ #接頭詞-動詞接続
210
+ #
211
+ # prefix-adjectival: Prefixes that attach to adjectives.
212
+ # e.g. お (寒いですねえ), バカ (でかい)
213
+ #接頭詞-形容詞接続
214
+ #
215
+ # prefix-numerical: Prefixes that attach to numerical expressions.
216
+ # e.g. 約, およそ, 毎時
217
+ #接頭詞-数接続
218
+ #
219
+ #####
220
+ # verb: unclassified verbs
221
+ #動詞
222
+ #
223
+ # verb-main:
224
+ #動詞-自立
225
+ #
226
+ # verb-auxiliary:
227
+ #動詞-非自立
228
+ #
229
+ # verb-suffix:
230
+ #動詞-接尾
231
+ #
232
+ #####
233
+ # adjective: unclassified adjectives
234
+ #形容詞
235
+ #
236
+ # adjective-main:
237
+ #形容詞-自立
238
+ #
239
+ # adjective-auxiliary:
240
+ #形容詞-非自立
241
+ #
242
+ # adjective-suffix:
243
+ #形容詞-接尾
244
+ #
245
+ #####
246
+ # adverb: unclassified adverbs
247
+ #副詞
248
+ #
249
+ # adverb-misc: Words that can be segmented into one unit and where adnominal
250
+ # modification is not possible.
251
+ # e.g. あいかわらず, 多分
252
+ #副詞-一般
253
+ #
254
+ # adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
255
+ # な, する, だ, etc.
256
+ # e.g. こんなに, そんなに, あんなに, なにか, なんでも
257
+ #副詞-助詞類接続
258
+ #
259
+ #####
260
+ # adnominal: Words that only have noun-modifying forms.
261
+ # e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
262
+ # どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
263
+ # 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
264
+ #連体詞
265
+ #
266
+ #####
267
+ # conjunction: Conjunctions that can occur independently.
268
+ # e.g. が, けれども, そして, じゃあ, それどころか
269
+ 接続詞
270
+ #
271
+ #####
272
+ # particle: unclassified particles.
273
+ 助詞
274
+ #
275
+ # particle-case: case particles where the subclassification is undefined.
276
+ 助詞-格助詞
277
+ #
278
+ # particle-case-misc: Case particles.
279
+ # e.g. から, が, で, と, に, へ, より, を, の, にて
280
+ 助詞-格助詞-一般
281
+ #
282
+ # particle-case-quote: the "to" that appears after nouns, a person’s speech,
283
+ # quotation marks, expressions of decisions from a meeting, reasons, judgements,
284
+ # conjectures, etc.
285
+ # e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
286
+ 助詞-格助詞-引用
287
+ #
288
+ # particle-case-compound: Compounds of particles and verbs that mainly behave
289
+ # like case particles.
290
+ # e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
291
+ # にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
292
+ # にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
293
+ # に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
294
+ # に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
295
+ # にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
296
+ # にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
297
+ # って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
298
+ 助詞-格助詞-連語
299
+ #
300
+ # particle-conjunctive:
301
+ # e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
302
+ # ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
303
+ # (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
304
+ 助詞-接続助詞
305
+ #
306
+ # particle-dependency:
307
+ # e.g. こそ, さえ, しか, すら, は, も, ぞ
308
+ 助詞-係助詞
309
+ #
310
+ # particle-adverbial:
311
+ # e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
312
+ # (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
313
+ # (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
314
+ # (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
315
+ # ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
316
+ 助詞-副助詞
317
+ #
318
+ # particle-interjective: particles with interjective grammatical roles.
319
+ # e.g. (松島) や
320
+ 助詞-間投助詞
321
+ #
322
+ # particle-coordinate:
323
+ # e.g. と, たり, だの, だり, とか, なり, や, やら
324
+ 助詞-並立助詞
325
+ #
326
+ # particle-final:
327
+ # e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
328
+ # ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
329
+ 助詞-終助詞
330
+ #
331
+ # particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
332
+ # adverbial, conjunctive, or sentence final. For example:
333
+ # (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
334
+ # (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
335
+ # 「(祈りが届いたせい) か (, 試験に合格した.)」
336
+ # (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
337
+ # e.g. か
338
+ 助詞-副助詞/並立助詞/終助詞
339
+ #
340
+ # particle-adnominalizer: The "no" that attaches to nouns and modifies
341
+ # non-inflectional words.
342
+ 助詞-連体化
343
+ #
344
+ # particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
345
+ # that are giongo, giseigo, or gitaigo.
346
+ # e.g. に, と
347
+ 助詞-副詞化
348
+ #
349
+ # particle-special: A particle that does not fit into one of the above classifications.
350
+ # This includes particles that are used in Tanka, Haiku, and other poetry.
351
+ # e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
352
+ 助詞-特殊
353
+ #
354
+ #####
355
+ # auxiliary-verb:
356
+ 助動詞
357
+ #
358
+ #####
359
+ # interjection: Greetings and other exclamations.
360
+ # e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
361
+ # いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
362
+ #感動詞
363
+ #
364
+ #####
365
+ # symbol: unclassified Symbols.
366
+ 記号
367
+ #
368
+ # symbol-misc: A general symbol not in one of the categories below.
369
+ # e.g. [○◎@$〒→+]
370
+ 記号-一般
371
+ #
372
+ # symbol-comma: Commas
373
+ # e.g. [,、]
374
+ 記号-読点
375
+ #
376
+ # symbol-period: Periods and full stops.
377
+ # e.g. [..。]
378
+ 記号-句点
379
+ #
380
+ # symbol-space: Full-width whitespace.
381
+ 記号-空白
382
+ #
383
+ # symbol-open_bracket:
384
+ # e.g. [({‘“『【]
385
+ 記号-括弧開
386
+ #
387
+ # symbol-close_bracket:
388
+ # e.g. [)}’”』」】]
389
+ 記号-括弧閉
390
+ #
391
+ # symbol-alphabetic:
392
+ #記号-アルファベット
393
+ #
394
+ #####
395
+ # other: unclassified other
396
+ #その他
397
+ #
398
+ # other-interjection: Words that are hard to classify as noun-suffixes or
399
+ # sentence-final particles.
400
+ # e.g. (だ)ァ
401
+ その他-間投
402
+ #
403
+ #####
404
+ # filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
405
+ # e.g. あの, うんと, えと
406
+ フィラー
407
+ #
408
+ #####
409
+ # non-verbal: non-verbal sound.
410
+ 非言語音
411
+ #
412
+ #####
413
+ # fragment:
414
+ #語断片
415
+ #
416
+ #####
417
+ # unknown: unknown part of speech.
418
+ #未知語
419
+ #
420
+ ##### End of file