muck-raker 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README.rdoc +24 -0
  3. data/Rakefile +76 -0
  4. data/VERSION +1 -0
  5. data/app/controllers/admin/recommender/feeds_controller.rb +174 -0
  6. data/app/controllers/recommender/default_controller.rb +15 -0
  7. data/app/controllers/recommender/entries_controller.rb +79 -0
  8. data/app/controllers/recommender/recommendations_controller.rb +46 -0
  9. data/app/controllers/recommender/search_controller.rb +82 -0
  10. data/app/helpers/muck_raker_helper.rb +29 -0
  11. data/app/models/click.rb +2 -0
  12. data/app/models/cloud_cache.rb +9 -0
  13. data/app/models/entry.rb +274 -0
  14. data/app/models/feed.rb +29 -0
  15. data/app/models/language.rb +2 -0
  16. data/app/models/oai_endpoint.rb +2 -0
  17. data/app/models/recommendation.rb +2 -0
  18. data/app/models/service.rb +2 -0
  19. data/app/models/subject.rb +2 -0
  20. data/app/models/user.rb +36 -0
  21. data/app/views/default/_advertisements.html.erb +30 -0
  22. data/app/views/default/_language_list.html.erb +5 -0
  23. data/app/views/default/index.html.erb +3 -0
  24. data/app/views/default/tour.html.erb +15 -0
  25. data/app/views/default/widgets.html.erb +4 -0
  26. data/app/views/documents/.tmp_not_found.html +0 -0
  27. data/app/views/entries/_cloud_type_selector.html.erb +9 -0
  28. data/app/views/entries/_result.html.erb +10 -0
  29. data/app/views/entries/_tag_cloud.html.erb +4 -0
  30. data/app/views/entries/_tags.html.erb +7 -0
  31. data/app/views/entries/browse_by_tags.html.erb +33 -0
  32. data/app/views/entries/collections.html.erb +28 -0
  33. data/app/views/entries/details.html.erb +39 -0
  34. data/app/views/entries/index.html.erb +8 -0
  35. data/app/views/entries/search.html.erb +21 -0
  36. data/app/views/entries/show.html.erb +19 -0
  37. data/app/views/entries/track_clicks.html.erb +0 -0
  38. data/app/views/feeds/_feed_selection.html.erb +1 -0
  39. data/app/views/feeds/edit.html.erb +107 -0
  40. data/app/views/feeds/harvest_now.html.erb +3 -0
  41. data/app/views/feeds/index.html.erb +95 -0
  42. data/app/views/feeds/new.html.erb +109 -0
  43. data/app/views/feeds/unban.html.erb +3 -0
  44. data/app/views/layouts/_flashes.html.erb +5 -0
  45. data/app/views/layouts/default.html.erb +35 -0
  46. data/app/views/layouts/default_old.html.erb +43 -0
  47. data/app/views/layouts/global/_header.html.erb +16 -0
  48. data/app/views/layouts/global/_search_box.html.erb +13 -0
  49. data/app/views/layouts/recommendations.html.erb +17 -0
  50. data/app/views/recommendations/document_not_found.html.erb +13 -0
  51. data/app/views/recommendations/index.pjs.erb +44 -0
  52. data/app/views/recommendations/index.rss.builder +37 -0
  53. data/app/views/recommendations/index.xml.builder +34 -0
  54. data/app/views/recommendations/index_js.pjs.erb +35 -0
  55. data/app/views/search/_languages.html.erb +6 -0
  56. data/app/views/search/_result.html.erb +9 -0
  57. data/app/views/search/_search_box.html.erb +9 -0
  58. data/app/views/search/index.html.erb +5 -0
  59. data/app/views/search/results.atom.builder +27 -0
  60. data/app/views/search/results.html.erb +35 -0
  61. data/app/views/search/results.pjs.erb +13 -0
  62. data/app/views/search/results.rdf.builder +24 -0
  63. data/app/views/search/results.rss.builder +31 -0
  64. data/app/views/search/results.xml.builder +19 -0
  65. data/config/muck_raker_routes.rb +39 -0
  66. data/config/solr/README.txt +0 -0
  67. data/config/solr/cores/de/conf/protwords.txt +21 -0
  68. data/config/solr/cores/de/conf/schema.xml +81 -0
  69. data/config/solr/cores/de/conf/solrconfig.xml +304 -0
  70. data/config/solr/cores/de/conf/stopwords.txt +341 -0
  71. data/config/solr/cores/de/conf/synonyms.txt +31 -0
  72. data/config/solr/cores/en/conf/protwords.txt +21 -0
  73. data/config/solr/cores/en/conf/schema.xml +83 -0
  74. data/config/solr/cores/en/conf/solrconfig.xml +304 -0
  75. data/config/solr/cores/en/conf/stopwords.txt +341 -0
  76. data/config/solr/cores/en/conf/synonyms.txt +31 -0
  77. data/config/solr/cores/es/conf/protwords.txt +21 -0
  78. data/config/solr/cores/es/conf/schema.xml +81 -0
  79. data/config/solr/cores/es/conf/solrconfig.xml +304 -0
  80. data/config/solr/cores/es/conf/stopwords.txt +206 -0
  81. data/config/solr/cores/es/conf/synonyms.txt +31 -0
  82. data/config/solr/cores/fr/conf/protwords.txt +21 -0
  83. data/config/solr/cores/fr/conf/schema.xml +82 -0
  84. data/config/solr/cores/fr/conf/solrconfig.xml +304 -0
  85. data/config/solr/cores/fr/conf/stopwords.txt +57 -0
  86. data/config/solr/cores/fr/conf/synonyms.txt +31 -0
  87. data/config/solr/cores/ja/conf/protwords.txt +21 -0
  88. data/config/solr/cores/ja/conf/schema.xml +69 -0
  89. data/config/solr/cores/ja/conf/solrconfig.xml +304 -0
  90. data/config/solr/cores/ja/conf/stopwords.txt +341 -0
  91. data/config/solr/cores/ja/conf/synonyms.txt +31 -0
  92. data/config/solr/cores/nl/conf/protwords.txt +21 -0
  93. data/config/solr/cores/nl/conf/schema.xml +78 -0
  94. data/config/solr/cores/nl/conf/solrconfig.xml +304 -0
  95. data/config/solr/cores/nl/conf/stopwords.txt +45691 -0
  96. data/config/solr/cores/nl/conf/synonyms.txt +31 -0
  97. data/config/solr/cores/ru/conf/protwords.txt +21 -0
  98. data/config/solr/cores/ru/conf/schema.xml +78 -0
  99. data/config/solr/cores/ru/conf/solrconfig.xml +304 -0
  100. data/config/solr/cores/ru/conf/stopwords.txt +341 -0
  101. data/config/solr/cores/ru/conf/synonyms.txt +31 -0
  102. data/config/solr/cores/zh/conf/protwords.txt +21 -0
  103. data/config/solr/cores/zh/conf/schema.xml +72 -0
  104. data/config/solr/cores/zh/conf/solrconfig.xml +304 -0
  105. data/config/solr/cores/zh/conf/stopwords.txt +341 -0
  106. data/config/solr/cores/zh/conf/synonyms.txt +31 -0
  107. data/config/solr/solr.xml +50 -0
  108. data/db/bootstrap/feeds.yml +4268 -0
  109. data/db/bootstrap/oai_endpoints.yml +6 -0
  110. data/db/bootstrap/services.yml +172 -0
  111. data/db/migrate/20090602191243_create_muck_raker.rb +304 -0
  112. data/install.rb +1 -0
  113. data/lib/muck_raker/initialize_routes.rb +8 -0
  114. data/lib/muck_raker/languages.rb +53 -0
  115. data/lib/muck_raker.rb +6 -0
  116. data/locales/en.yml +42 -0
  117. data/rails/init.rb +27 -0
  118. data/raker/lib/aggregatord.jar +0 -0
  119. data/raker/lib/aggregatord.properties +45 -0
  120. data/raker/lib/apache-solr-common-1.3.0.jar +0 -0
  121. data/raker/lib/apache-solr-core-1.3.0.jar +0 -0
  122. data/raker/lib/apache-solr-dataimporthandler-1.3.0.jar +0 -0
  123. data/raker/lib/axis.jar +0 -0
  124. data/raker/lib/commons-codec-1.3.jar +0 -0
  125. data/raker/lib/commons-csv-1.0-SNAPSHOT-r609327.jar +0 -0
  126. data/raker/lib/commons-daemon.jar +0 -0
  127. data/raker/lib/commons-dbcp-1.2.1.jar +0 -0
  128. data/raker/lib/commons-discovery-0.2.jar +0 -0
  129. data/raker/lib/commons-fileupload-1.2.jar +0 -0
  130. data/raker/lib/commons-httpclient-3.1.jar +0 -0
  131. data/raker/lib/commons-io-1.3.1.jar +0 -0
  132. data/raker/lib/commons-logging-1.0.4.jar +0 -0
  133. data/raker/lib/commons-pool-1.3.jar +0 -0
  134. data/raker/lib/delicious-1.13.jar +0 -0
  135. data/raker/lib/flickrapi-1.0a9.jar +0 -0
  136. data/raker/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/raker/lib/javax.jms.jar +0 -0
  138. data/raker/lib/jaxrpc.jar +0 -0
  139. data/raker/lib/jdom.jar +0 -0
  140. data/raker/lib/jericho.jar +0 -0
  141. data/raker/lib/junit-4.3.jar +0 -0
  142. data/raker/lib/log4j-1.2.8.jar +0 -0
  143. data/raker/lib/lucene-analyzers-2.4-dev.jar +0 -0
  144. data/raker/lib/lucene-core-2.4-dev.jar +0 -0
  145. data/raker/lib/lucene-highlighter-2.4-dev.jar +0 -0
  146. data/raker/lib/lucene-memory-2.4-dev.jar +0 -0
  147. data/raker/lib/lucene-queries-2.4-dev.jar +0 -0
  148. data/raker/lib/lucene-snowball-2.4-dev.jar +0 -0
  149. data/raker/lib/lucene-spellchecker-2.4-dev.jar +0 -0
  150. data/raker/lib/mail.jar +0 -0
  151. data/raker/lib/mysql-connector-java-5.1.7-bin.jar +0 -0
  152. data/raker/lib/recommenderd.jar +0 -0
  153. data/raker/lib/recommenderd.properties +64 -0
  154. data/raker/lib/rome-1.0.jar +0 -0
  155. data/raker/lib/rome.properties +90 -0
  156. data/raker/lib/solr-commons-csv-pom.xml.template +36 -0
  157. data/raker/lib/solr-lucene-analyzers-pom.xml.template +35 -0
  158. data/raker/lib/solr-lucene-contrib-pom.xml.template +48 -0
  159. data/raker/lib/solr-lucene-core-pom.xml.template +36 -0
  160. data/raker/lib/solr-lucene-highlighter-pom.xml.template +38 -0
  161. data/raker/lib/solr-lucene-queries-pom.xml.template +38 -0
  162. data/raker/lib/solr-lucene-snowball-pom.xml.template +36 -0
  163. data/raker/lib/solr-lucene-spellchecker-pom.xml.template +36 -0
  164. data/raker/lib/stax-utils.jar +0 -0
  165. data/raker/lib/wsdl4j-1.5.1.jar +0 -0
  166. data/raker/lib/wstx-asl-3.2.7.jar +0 -0
  167. data/tasks/bootstrap.rake +18 -0
  168. data/tasks/raker.rake +68 -0
  169. data/test/recommender_test.rb +8 -0
  170. data/test/test_helper.rb +20 -0
  171. data/uninstall.rb +1 -0
  172. metadata +253 -0
@@ -0,0 +1,5 @@
1
+ <h1>Search Open Educational Resources</h1>
2
+ <% form_tag(:controller => 'search', :action => 'results') do %>
3
+ <%= text_field_tag(:form_terms, params[:form_terms], :size => 30) %>
4
+ <%= submit_tag(" Search! ") %>
5
+ <% end %>
@@ -0,0 +1,27 @@
1
+ headers["Content-Type"] = "application/atom+xml"
2
+ xml.instruct!
3
+
4
+ xml.feed "xmlns" => "http://www.w3.org/2005/Atom" do
5
+
6
+ xml.title "Open Educational Resources"
7
+ xml.link "rel" => "self", "href" => request.env["REQUEST_URI"]
8
+ xml.id url_for(:only_path => false, :controller => 'results')
9
+ xml.updated CGI.rfc1123_date Time.now
10
+ xml.author { xml.name "Open Educational Resources" }
11
+
12
+ xml.image do
13
+ xml.title 'Open Educational Resources logo'
14
+ xml.url 'http://www.oerrecommender.org/images/diagram.gif'
15
+ xml.link 'http://www.oerrecommender.org'
16
+ xml.description 'Open Educational Resources'
17
+ end
18
+
19
+ @results.each do |result|
20
+ xml.entry do
21
+ xml.title result.title
22
+ xml.link "rel" => "alternate", "href" => result.uri
23
+ xml.id result.uri
24
+ end
25
+ end
26
+
27
+ end
@@ -0,0 +1,35 @@
1
+ <h1><%= t(:title) %> - <%= t(:search)%></h1>
2
+
3
+ <% if flash[:notice] %><p style="color: green"><%= flash[:notice] %></p><% end %>
4
+
5
+ <%= render(:partial => "search_box") %>
6
+
7
+ <% if @results.length > 0%>
8
+
9
+ <hr class="divider"/>
10
+
11
+ <p id='results-description'><%= t(:results) %> <b><%= @offset %> - <%= (@offset + @limit) < @hit_count ? (@offset + @limit) : @hit_count %></b> <%= t(:of) %> <b><%= @hit_count %></b> <%= t(:for) %> <b><%= @terms %></b></p>
12
+
13
+ <div id="results">
14
+ <%= render(:partial => "result", :collection => @results) %>
15
+ </div>
16
+
17
+ <div>
18
+ <b><%= (link_to(t(:previous), {:action => "results", :terms => URI.unescape(@term_list), :locale => @language, :limit => @limit, :offset => @offset - @limit}, :rel => "nofollow") + "&nbsp;&nbsp;") if @offset > 0 %>
19
+ <%= (link_to(t(:next), {:action => "results", :terms => URI.unescape(@term_list), :locale => @language,:limit => @limit, :offset => @offset + @limit}, :rel => "nofollow") if @results.size >= @limit )%>
20
+ </b>
21
+ </div>
22
+
23
+ <p>
24
+ <a rel="nofollow" href="<%=@current_uri%>.rss<%=@url_terms%>" title="RSS for current page"><%= t(:rss) %></a>
25
+ <a rel="nofollow" href="<%=@current_uri%>.rss<%=@url_terms%>" title="RSS for current page"><img src="/images/rss.gif" alt="RSS" title="RSS" border="0"></a>
26
+ <a rel="nofollow" href="<%=@current_uri%>.xml<%=@url_terms%>" title="XML for current page"><%= t(:xml) %></a>
27
+ <a rel="nofollow" href="<%=@current_uri%>.xml<%=@url_terms%>" title="XML for current page"><img src="/images/rss.gif" alt="XML" title="XML" border="0"></a>
28
+ <a rel="nofollow" href="<%=@current_uri%>.atom<%=@url_terms%>" title="Atom for current page"><%= t(:atom) %></a>
29
+ <a rel="nofollow" href="<%=@current_uri%>.atom<%=@url_terms%>" title="Atom for current page"><img src="/images/rss.gif" alt="Atom" title="Atom" border="0"></a>
30
+ <a rel="nofollow" href="<%=@current_uri%>.rdf<%=@url_terms%>" title="RDF for current page"><%= t(:rdf) %></a>
31
+ <a rel="nofollow" href="<%=@current_uri%>.rdf<%=@url_terms%>" title="RDF for current page"><img src="/images/rdf.gif" alt="RDF" title="RDF" border="0"></a>
32
+ </p>
33
+ <% else %>
34
+ <p><%= t(:no_hits) %></p>
35
+ <% end %>
@@ -0,0 +1,13 @@
1
+ <%if !@json_results.nil? %>
2
+ var recs = <%= @json_results %>;
3
+
4
+ document.write('<div class="oer_recommender_container">');
5
+ document.write('<div class="oer_recommender_list">');
6
+ for(nRec = 0; nRec < recs.length; nRec++) {
7
+ r = recs[nRec];
8
+ document.write('<p class="oer_recommender_item"><a class="oer_recommender_hit_link" href="' + (r.direct_link == null ? r.permalink : r.direct_link) + '">' + r.title + ' (' + r.collection + ')</a></p>');
9
+ document.write('<p class="oer_recommender_item_description">' + (r.description.length > 200 ? r.description.substring(0,200) + '...' : r.description) + '</a></p>');
10
+ }
11
+ document.write('</div>');
12
+ document.write('</div>');
13
+ <% end %>
@@ -0,0 +1,24 @@
1
+ headers["Content-Type"] = "application/rdf+xml"
2
+ xml.instruct!
3
+
4
+ xml.RDF :RDF, "xmlns:RDF" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "xmlns:result" => "http://www.oerrecommender.org/rdf#" do
5
+
6
+ xml.RDF :Description, "RDF:about"=>"http://oerrecommender.org/result" do
7
+ xml.result :name do
8
+ xml.text! 'Results for ' + html_escape(request.env["REQUEST_URI"])
9
+ end
10
+ end
11
+
12
+ @results.each do |result|
13
+ xml.RDF :Description, "RDF:about" => result.uri do
14
+ xml.result :title do xml.text! result.title end
15
+ xml.result :uri do xml.text! result.uri end
16
+ end
17
+ end
18
+
19
+ xml.RDF :Seq, "RDF:about" => url_for(:only_path => false, :controller => 'search') do
20
+ @results.each do |result|
21
+ xml.RDF :li, "RDF:resource" => result.uri
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,31 @@
1
+ headers["Content-Type"] = "application/rss+xml"
2
+ xml.instruct! :xml, :version=>"1.0"
3
+ xml.rss "version" => "2.0", "xmlns:dc" => "http://purl.org/dc/elements/1.1/" do
4
+ xml.channel do
5
+
6
+ xml.title 'OER Recommender - Search Results for: ' + URI.unescape(params[:terms])
7
+ xml.link url_for(request.env["REQUEST_URI"])
8
+ xml.pubDate CGI.rfc1123_date Time.now
9
+ xml.description 'OER Recommender - Search Results for: ' + URI.unescape(params[:terms])
10
+ xml.generator 'OER Recommender'
11
+
12
+ xml.image do
13
+ xml.title 'Open Educational Resources logo'
14
+ xml.url 'http://www.oerrecommender.org/images/diagram.gif'
15
+ xml.link 'http://www.oerrecommender.org'
16
+ xml.description 'Open Educational Resources'
17
+ end
18
+
19
+ @results.each do |result|
20
+ xml.item do
21
+ xml.title result.title
22
+ xml.link result.uri
23
+ xml.guid result.uri
24
+ xml.pubDate result.published_at
25
+ xml.description truncate_words(result.description)
26
+ end
27
+ end
28
+
29
+ end
30
+ end
31
+
@@ -0,0 +1,19 @@
1
+ headers["Content-Type"] = "application/xml"
2
+ xml.instruct!
3
+
4
+ xml.results(:search => URI.unescape(params[:terms]), :hits => @hit_count, :offset => @offset, :limit => @limit)do
5
+ @results.each do |result|
6
+ xml.result(:published_at => result.published_at, :relevance => result.solr_score) do
7
+ xml.id result.id
8
+ xml.title result.title
9
+ xml.description truncate_words(result.description)
10
+ xml.uri result.uri
11
+ xml.direct_link result.direct_link
12
+ xml.collection do
13
+ xml.title result.feed.title
14
+ xml.short_title result.feed.short_title
15
+ xml.uri result.feed.uri
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,39 @@
1
+ ActionController::Routing::Routes.draw do |map|
2
+
3
+ # admin
4
+ map.namespace :admin do |a|
5
+ a.resources :feeds, :controller => 'recommender/feeds', :member => { :harvest_now => :post, :ban => :post, :unban => :post }
6
+ end
7
+
8
+ map.connect '/feed_list', :controller => 'recommender/feeds', :action => 'selection_list'
9
+ map.connect '/widgets', :controller => 'recommender/default', :action => 'widgets'
10
+ map.connect '/tour', :controller => 'recommender/default', :action => 'tour'
11
+
12
+ map.resources :entries, :controller => 'recommender/entries'
13
+ map.connect 'r', :controller => 'recommender/entries', :action => 'track_clicks'
14
+ map.connect 'entries/tags/*tags', :controller => 'recommender/entries', :action => 'browse_by_tags'
15
+ map.connect 'entries/search/*terms', :controller => 'recommender/entries', :action => 'search'
16
+ map.connect 'collections', :controller => 'entries', :action => 'collections'
17
+
18
+ map.resources :recommendations, :controller => 'recommender/recommendations'
19
+
20
+ # redirect (and hit tracking)
21
+
22
+ # search
23
+ map.connect 'search/relations.:format/*terms', :controller => 'search', :action => 'relations'
24
+ map.connect 'search/relations/*terms', :controller => 'search', :action => 'relations'
25
+ map.connect 'search/source_uri.:format', :controller => 'search', :action => 'source_uri'
26
+ map.connect 'search/source_uri', :controller => 'search', :action => 'source_uri'
27
+ map.connect 'search/destination_uri.:format', :controller => 'search', :action => 'destination_uri'
28
+ map.connect 'search/destination_uri', :controller => 'search', :action => 'destination_uri'
29
+ map.connect 'search/uris.:format', :controller => 'search', :action => 'uris'
30
+ map.connect 'search/uris', :controller => 'search', :action => 'uris'
31
+ map.connect 'search/results.:format', :controller => 'search', :action => 'results'
32
+ map.connect 'search/results', :controller => 'search', :action => 'results'
33
+
34
+ # You can have the root of your site routed by hooking up ''
35
+ # -- just remember to delete public/index.html.
36
+ map.connect '/', :controller => 'default', :action => 'index'
37
+ map.connect '/frames', :controller => 'documents', :action => 'frames'
38
+
39
+ end
File without changes
@@ -0,0 +1,21 @@
1
+ # The ASF licenses this file to You under the Apache License, Version 2.0
2
+ # (the "License"); you may not use this file except in compliance with
3
+ # the License. You may obtain a copy of the License at
4
+ #
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ #
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ #-----------------------------------------------------------------------
14
+ # Use a protected word file to protect against the stemmer reducing two
15
+ # unrelated words to the same base word.
16
+
17
+ # Some non-words that normally won't be encountered,
18
+ # just to test that they won't be stemmed.
19
+ dontstems
20
+ zwhacky
21
+
@@ -0,0 +1,81 @@
1
+ <?xml version="1.0" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <schema name="de" version="1.1">
20
+ <types>
21
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
22
+ <fieldType name="integer" class="solr.IntField" omitNorms="false"/>
23
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
24
+ <analyzer type="index">
25
+ <tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/>
26
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
27
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
28
+ <filter class="solr.LowerCaseFilterFactory"/>
29
+ <filter class="solr.GermanStemFilterFactory" protected="protwords.txt"/>
30
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
31
+ </analyzer>
32
+ <analyzer type="query">
33
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
34
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
35
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
36
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
37
+ <filter class="solr.LowerCaseFilterFactory"/>
38
+ <filter class="solr.GermanStemFilterFactory" protected="protwords.txt"/>
39
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
40
+ </analyzer>
41
+ </fieldType>
42
+ <fieldType name="tag" class="solr.TextField" positionIncrementGap="100">
43
+ <analyzer type="index">
44
+ <tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/>
45
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
46
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
47
+ <filter class="solr.LowerCaseFilterFactory"/>
48
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
49
+ </analyzer>
50
+ </fieldType>
51
+ </types>
52
+
53
+
54
+ <fields>
55
+ <!-- general -->
56
+ <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
57
+
58
+ <field name="pk_i" type="integer" indexed="true" stored="true"/>
59
+ <field name="pk_s" type="string" indexed="true" stored="true"/>
60
+ <field name="text" type="text" indexed="true" stored="true" termVectors="true" multiValued="true"/>
61
+ <field name="tag" type="tag" indexed="true" stored="true" termVectors="true" multiValued="true"/>
62
+ <dynamicField name="*_i" type="integer" indexed="true" stored="false"/>
63
+ <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
64
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
65
+ <dynamicField name="*_facet" type="string" indexed="true" stored="false"/>
66
+ <dynamicField name="*_s_mv" type="string" indexed="true" stored="false" multiValued="true"/>
67
+ </fields>
68
+
69
+ <!-- field to use to determine and enforce document uniqueness. -->
70
+ <uniqueKey>id</uniqueKey>
71
+
72
+ <copyField source="*_t" dest="text"/>
73
+ <copyField source="*_facet" dest="text"/>
74
+
75
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
76
+ <defaultSearchField>text</defaultSearchField>
77
+
78
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
79
+ <solrQueryParser defaultOperator="OR"/>
80
+ </schema>
81
+
@@ -0,0 +1,304 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <config>
20
+ <!-- Set this to 'false' if you want solr to continue working after it has
21
+ encountered an severe configuration error. In a production environment,
22
+ you may want solr to keep working even if one handler is mis-configured.
23
+
24
+ You may also set this to false using by setting the system property:
25
+ -Dsolr.abortOnConfigurationError=false
26
+ -->
27
+ <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
28
+
29
+ <!-- Used to specify an alternate directory to hold all index data
30
+ other than the default ./data under the Solr home.
31
+ If replication is in use, this should match the replication configuration. -->
32
+ <dataDir>${solr.data.dir:../../../../../solr_indexes}/de</dataDir>
33
+
34
+ <!-- Values here affect all index writers and act as a default unless overridden. -->
35
+ <indexDefaults>
36
+ <useCompoundFile>false</useCompoundFile>
37
+ <mergeFactor>10</mergeFactor>
38
+ <maxBufferedDocs>1000</maxBufferedDocs>
39
+ <maxMergeDocs>2147483647</maxMergeDocs>
40
+ <maxFieldLength>10000</maxFieldLength>
41
+ <writeLockTimeout>1000</writeLockTimeout>
42
+ <commitLockTimeout>10000</commitLockTimeout>
43
+ </indexDefaults>
44
+ <!-- options specific to the main on-disk lucene index -->
45
+ <!--
46
+ <mainIndex>
47
+ <useCompoundFile>false</useCompoundFile>
48
+ <mergeFactor>10</mergeFactor>
49
+ <maxBufferedDocs>1000</maxBufferedDocs>
50
+ <maxMergeDocs>2147483647</maxMergeDocs>
51
+ <maxFieldLength>10000</maxFieldLength>
52
+
53
+ <unlockOnStartup>false</unlockOnStartup>
54
+ </mainIndex>
55
+ -->
56
+ <!-- If true, unlock any held write or commit locks on startup.
57
+ This defeats the locking mechanism that allows multiple
58
+ processes to safely access a lucene index, and should be
59
+ used with care. -->
60
+
61
+ <!-- the default high-performance update handler -->
62
+
63
+ <updateHandler class="solr.DirectUpdateHandler2">
64
+
65
+ <!-- A prefix of "solr." for class names is an alias that
66
+ causes solr to search appropriate packages, including
67
+ org.apache.solr.(search|update|request|core|analysis)
68
+ -->
69
+
70
+ <!-- autocommit pending docs if certain criteria are met
71
+ <autoCommit>
72
+ <maxDocs>10000</maxDocs>
73
+ <maxTime>1000</maxTime>
74
+ </autoCommit>
75
+ -->
76
+
77
+ <!-- The RunExecutableListener executes an external command.
78
+ exe - the name of the executable to run
79
+ dir - dir to use as the current working directory. default="."
80
+ wait - the calling thread waits until the executable returns. default="true"
81
+ args - the arguments to pass to the program. default=nothing
82
+ env - environment variables to set. default=nothing
83
+ -->
84
+ <!-- A postCommit event is fired after every commit or optimize command
85
+ <listener event="postCommit" class="solr.RunExecutableListener">
86
+ <str name="exe">snapshooter</str>
87
+ <str name="dir">solr/bin</str>
88
+ <bool name="wait">true</bool>
89
+ <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
90
+ <arr name="env"> <str>MYVAR=val1</str> </arr>
91
+ </listener>
92
+ -->
93
+ <!-- A postOptimize event is fired only after every optimize command, useful
94
+ in conjunction with index distribution to only distribute optimized indicies
95
+ <listener event="postOptimize" class="solr.RunExecutableListener">
96
+ <str name="exe">snapshooter</str>
97
+ <str name="dir">solr/bin</str>
98
+ <bool name="wait">true</bool>
99
+ </listener>
100
+ -->
101
+
102
+ </updateHandler>
103
+
104
+
105
+ <query>
106
+ <!-- Maximum number of clauses in a boolean query... can affect
107
+ range or prefix queries that expand to big boolean
108
+ queries. An exception is thrown if exceeded. -->
109
+ <maxBooleanClauses>1024</maxBooleanClauses>
110
+
111
+
112
+ <!-- Cache used by SolrIndexSearcher for filters (DocSets),
113
+ unordered sets of *all* documents that match a query.
114
+ When a new searcher is opened, its caches may be prepopulated
115
+ or "autowarmed" using data from caches in the old searcher.
116
+ autowarmCount is the number of items to prepopulate. For LRUCache,
117
+ the autowarmed items will be the most recently accessed items.
118
+ Parameters:
119
+ class - the SolrCache implementation (currently only LRUCache)
120
+ size - the maximum number of entries in the cache
121
+ initialSize - the initial capacity (number of entries) of
122
+ the cache. (seel java.util.HashMap)
123
+ autowarmCount - the number of entries to prepopulate from
124
+ and old cache.
125
+ -->
126
+ <filterCache
127
+ class="solr.LRUCache"
128
+ size="512"
129
+ initialSize="512"
130
+ autowarmCount="256"/>
131
+
132
+ <!-- queryResultCache caches results of searches - ordered lists of
133
+ document ids (DocList) based on a query, a sort, and the range
134
+ of documents requested. -->
135
+ <queryResultCache
136
+ class="solr.LRUCache"
137
+ size="512"
138
+ initialSize="512"
139
+ autowarmCount="256"/>
140
+
141
+ <!-- documentCache caches Lucene Document objects (the stored fields for each document).
142
+ Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
143
+ <documentCache
144
+ class="solr.LRUCache"
145
+ size="512"
146
+ initialSize="512"
147
+ autowarmCount="0"/>
148
+
149
+ <!-- If true, stored fields that are not requested will be loaded lazily.
150
+
151
+ This can result in a significant speed improvement if the usual case is to
152
+ not load all stored fields, especially if the skipped fields are large compressed
153
+ text fields.
154
+ -->
155
+ <enableLazyFieldLoading>true</enableLazyFieldLoading>
156
+
157
+ <!-- Example of a generic cache. These caches may be accessed by name
158
+ through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
159
+ The purpose is to enable easy caching of user/application level data.
160
+ The regenerator argument should be specified as an implementation
161
+ of solr.search.CacheRegenerator if autowarming is desired. -->
162
+ <!--
163
+ <cache name="myUserCache"
164
+ class="solr.LRUCache"
165
+ size="4096"
166
+ initialSize="1024"
167
+ autowarmCount="1024"
168
+ regenerator="org.mycompany.mypackage.MyRegenerator"
169
+ />
170
+ -->
171
+
172
+ <!-- An optimization that attempts to use a filter to satisfy a search.
173
+ If the requested sort does not include score, then the filterCache
174
+ will be checked for a filter matching the query. If found, the filter
175
+ will be used as the source of document ids, and then the sort will be
176
+ applied to that.
177
+ <useFilterForSortedQuery>true</useFilterForSortedQuery>
178
+ -->
179
+
180
+ <!-- An optimization for use with the queryResultCache. When a search
181
+ is requested, a superset of the requested number of document ids
182
+ are collected. For example, if a search for a particular query
183
+ requests matching documents 10 through 19, and queryWindowSize is 50,
184
+ then documents 0 through 50 will be collected and cached. Any further
185
+ requests in that range can be satisfied via the cache. -->
186
+ <queryResultWindowSize>10</queryResultWindowSize>
187
+
188
+ <!-- This entry enables an int hash representation for filters (DocSets)
189
+ when the number of items in the set is less than maxSize. For smaller
190
+ sets, this representation is more memory efficient, more efficient to
191
+ iterate over, and faster to take intersections. -->
192
+ <HashDocSet maxSize="3000" loadFactor="0.75"/>
193
+
194
+
195
+ <!-- boolToFilterOptimizer converts boolean clauses with zero boost
196
+ into cached filters if the number of docs selected by the clause exceeds
197
+ the threshold (represented as a fraction of the total index) -->
198
+ <boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
199
+
200
+
201
+ <!-- a newSearcher event is fired whenever a new searcher is being prepared
202
+ and there is a current searcher handling requests (aka registered). -->
203
+ <!-- QuerySenderListener takes an array of NamedList and executes a
204
+ local query request for each NamedList in sequence. -->
205
+ <!--
206
+ <listener event="newSearcher" class="solr.QuerySenderListener">
207
+ <arr name="queries">
208
+ <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
209
+ <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
210
+ </arr>
211
+ </listener>
212
+ -->
213
+
214
+ <!-- a firstSearcher event is fired whenever a new searcher is being
215
+ prepared but there is no current registered searcher to handle
216
+ requests or to gain autowarming data from. -->
217
+ <!--
218
+ <listener event="firstSearcher" class="solr.QuerySenderListener">
219
+ <arr name="queries">
220
+ <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
221
+ </arr>
222
+ </listener>
223
+ -->
224
+
225
+ <!-- If a search request comes in and there is no current registered searcher,
226
+ then immediately register the still warming searcher and use it. If
227
+ "false" then all requests will block until the first searcher is done
228
+ warming. -->
229
+ <useColdSearcher>false</useColdSearcher>
230
+
231
+ <!-- Maximum number of searchers that may be warming in the background
232
+ concurrently. An error is returned if this limit is exceeded. Recommend
233
+ 1-2 for read-only slaves, higher for masters w/o cache warming. -->
234
+ <maxWarmingSearchers>4</maxWarmingSearchers>
235
+
236
+ </query>
237
+
238
+ <!--
239
+ Let the dispatch filter handler /select?qt=XXX
240
+ handleSelect=true will use consistent error handling for /select and /update
241
+ handleSelect=false will use solr1.1 style error formatting
242
+ -->
243
+ <requestDispatcher handleSelect="true" >
244
+ <!--Make sure your system has some authentication before enabling remote streaming! -->
245
+ <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
246
+ </requestDispatcher>
247
+
248
+
249
+ <!-- requestHandler plugins... incoming queries will be dispatched to the
250
+ correct handler based on the qt (query type) param matching the
251
+ name of registered handlers.
252
+ The "standard" request handler is the default and will be used if qt
253
+ is not specified in the request.
254
+ -->
255
+ <requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
256
+ <!-- default values for query parameters -->
257
+ <lst name="defaults">
258
+ <str name="echoParams">explicit</str>
259
+ <str name="json.nl">map</str>
260
+ <!--
261
+ <int name="rows">10</int>
262
+ <str name="fl">*</str>
263
+ <str name="version">2.1</str>
264
+ -->
265
+ </lst>
266
+ </requestHandler>
267
+
268
+ <!-- Note how you can register the same handler multiple times with
269
+ different names (and different init parameters)
270
+ -->
271
+ <!-- Update request handler.
272
+
273
+ Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
274
+ the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
275
+ The response format differs from solr1.1 formatting and returns a standard error code.
276
+
277
+ To enable solr1.1 behavior, remove the /update handler or change its path
278
+ <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
279
+ -->
280
+
281
+ <!-- queryResponseWriter plugins... query responses will be written using the
282
+ writer specified by the 'wt' request parameter matching the name of a registered
283
+ writer.
284
+ The "standard" writer is the default and will be used if 'wt' is not specified
285
+ in the request. XMLResponseWriter will be used if nothing is specified here.
286
+ The json, python, and ruby writers are also available by default.
287
+
288
+ <queryResponseWriter name="standard" class="org.apache.solr.request.XMLResponseWriter"/>
289
+ <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
290
+ <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
291
+ <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
292
+
293
+ <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
294
+ -->
295
+
296
+ <!-- XSLT response writer transforms the XML output by any xslt file found
297
+ in Solr's conf/xslt directory. Changes to xslt files are checked for
298
+ every xsltCacheLifetimeSeconds.
299
+ -->
300
+ <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
301
+ <int name="xsltCacheLifetimeSeconds">5</int>
302
+ </queryResponseWriter>
303
+
304
+ </config>