muck-raker 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README.rdoc +24 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/app/controllers/admin/recommender/feeds_controller.rb +174 -0
- data/app/controllers/recommender/default_controller.rb +15 -0
- data/app/controllers/recommender/entries_controller.rb +79 -0
- data/app/controllers/recommender/recommendations_controller.rb +46 -0
- data/app/controllers/recommender/search_controller.rb +82 -0
- data/app/helpers/muck_raker_helper.rb +29 -0
- data/app/models/click.rb +2 -0
- data/app/models/cloud_cache.rb +9 -0
- data/app/models/entry.rb +274 -0
- data/app/models/feed.rb +29 -0
- data/app/models/language.rb +2 -0
- data/app/models/oai_endpoint.rb +2 -0
- data/app/models/recommendation.rb +2 -0
- data/app/models/service.rb +2 -0
- data/app/models/subject.rb +2 -0
- data/app/models/user.rb +36 -0
- data/app/views/default/_advertisements.html.erb +30 -0
- data/app/views/default/_language_list.html.erb +5 -0
- data/app/views/default/index.html.erb +3 -0
- data/app/views/default/tour.html.erb +15 -0
- data/app/views/default/widgets.html.erb +4 -0
- data/app/views/documents/.tmp_not_found.html +0 -0
- data/app/views/entries/_cloud_type_selector.html.erb +9 -0
- data/app/views/entries/_result.html.erb +10 -0
- data/app/views/entries/_tag_cloud.html.erb +4 -0
- data/app/views/entries/_tags.html.erb +7 -0
- data/app/views/entries/browse_by_tags.html.erb +33 -0
- data/app/views/entries/collections.html.erb +28 -0
- data/app/views/entries/details.html.erb +39 -0
- data/app/views/entries/index.html.erb +8 -0
- data/app/views/entries/search.html.erb +21 -0
- data/app/views/entries/show.html.erb +19 -0
- data/app/views/entries/track_clicks.html.erb +0 -0
- data/app/views/feeds/_feed_selection.html.erb +1 -0
- data/app/views/feeds/edit.html.erb +107 -0
- data/app/views/feeds/harvest_now.html.erb +3 -0
- data/app/views/feeds/index.html.erb +95 -0
- data/app/views/feeds/new.html.erb +109 -0
- data/app/views/feeds/unban.html.erb +3 -0
- data/app/views/layouts/_flashes.html.erb +5 -0
- data/app/views/layouts/default.html.erb +35 -0
- data/app/views/layouts/default_old.html.erb +43 -0
- data/app/views/layouts/global/_header.html.erb +16 -0
- data/app/views/layouts/global/_search_box.html.erb +13 -0
- data/app/views/layouts/recommendations.html.erb +17 -0
- data/app/views/recommendations/document_not_found.html.erb +13 -0
- data/app/views/recommendations/index.pjs.erb +44 -0
- data/app/views/recommendations/index.rss.builder +37 -0
- data/app/views/recommendations/index.xml.builder +34 -0
- data/app/views/recommendations/index_js.pjs.erb +35 -0
- data/app/views/search/_languages.html.erb +6 -0
- data/app/views/search/_result.html.erb +9 -0
- data/app/views/search/_search_box.html.erb +9 -0
- data/app/views/search/index.html.erb +5 -0
- data/app/views/search/results.atom.builder +27 -0
- data/app/views/search/results.html.erb +35 -0
- data/app/views/search/results.pjs.erb +13 -0
- data/app/views/search/results.rdf.builder +24 -0
- data/app/views/search/results.rss.builder +31 -0
- data/app/views/search/results.xml.builder +19 -0
- data/config/muck_raker_routes.rb +39 -0
- data/config/solr/README.txt +0 -0
- data/config/solr/cores/de/conf/protwords.txt +21 -0
- data/config/solr/cores/de/conf/schema.xml +81 -0
- data/config/solr/cores/de/conf/solrconfig.xml +304 -0
- data/config/solr/cores/de/conf/stopwords.txt +341 -0
- data/config/solr/cores/de/conf/synonyms.txt +31 -0
- data/config/solr/cores/en/conf/protwords.txt +21 -0
- data/config/solr/cores/en/conf/schema.xml +83 -0
- data/config/solr/cores/en/conf/solrconfig.xml +304 -0
- data/config/solr/cores/en/conf/stopwords.txt +341 -0
- data/config/solr/cores/en/conf/synonyms.txt +31 -0
- data/config/solr/cores/es/conf/protwords.txt +21 -0
- data/config/solr/cores/es/conf/schema.xml +81 -0
- data/config/solr/cores/es/conf/solrconfig.xml +304 -0
- data/config/solr/cores/es/conf/stopwords.txt +206 -0
- data/config/solr/cores/es/conf/synonyms.txt +31 -0
- data/config/solr/cores/fr/conf/protwords.txt +21 -0
- data/config/solr/cores/fr/conf/schema.xml +82 -0
- data/config/solr/cores/fr/conf/solrconfig.xml +304 -0
- data/config/solr/cores/fr/conf/stopwords.txt +57 -0
- data/config/solr/cores/fr/conf/synonyms.txt +31 -0
- data/config/solr/cores/ja/conf/protwords.txt +21 -0
- data/config/solr/cores/ja/conf/schema.xml +69 -0
- data/config/solr/cores/ja/conf/solrconfig.xml +304 -0
- data/config/solr/cores/ja/conf/stopwords.txt +341 -0
- data/config/solr/cores/ja/conf/synonyms.txt +31 -0
- data/config/solr/cores/nl/conf/protwords.txt +21 -0
- data/config/solr/cores/nl/conf/schema.xml +78 -0
- data/config/solr/cores/nl/conf/solrconfig.xml +304 -0
- data/config/solr/cores/nl/conf/stopwords.txt +45691 -0
- data/config/solr/cores/nl/conf/synonyms.txt +31 -0
- data/config/solr/cores/ru/conf/protwords.txt +21 -0
- data/config/solr/cores/ru/conf/schema.xml +78 -0
- data/config/solr/cores/ru/conf/solrconfig.xml +304 -0
- data/config/solr/cores/ru/conf/stopwords.txt +341 -0
- data/config/solr/cores/ru/conf/synonyms.txt +31 -0
- data/config/solr/cores/zh/conf/protwords.txt +21 -0
- data/config/solr/cores/zh/conf/schema.xml +72 -0
- data/config/solr/cores/zh/conf/solrconfig.xml +304 -0
- data/config/solr/cores/zh/conf/stopwords.txt +341 -0
- data/config/solr/cores/zh/conf/synonyms.txt +31 -0
- data/config/solr/solr.xml +50 -0
- data/db/bootstrap/feeds.yml +4268 -0
- data/db/bootstrap/oai_endpoints.yml +6 -0
- data/db/bootstrap/services.yml +172 -0
- data/db/migrate/20090602191243_create_muck_raker.rb +304 -0
- data/install.rb +1 -0
- data/lib/muck_raker/initialize_routes.rb +8 -0
- data/lib/muck_raker/languages.rb +53 -0
- data/lib/muck_raker.rb +6 -0
- data/locales/en.yml +42 -0
- data/rails/init.rb +27 -0
- data/raker/lib/aggregatord.jar +0 -0
- data/raker/lib/aggregatord.properties +45 -0
- data/raker/lib/apache-solr-common-1.3.0.jar +0 -0
- data/raker/lib/apache-solr-core-1.3.0.jar +0 -0
- data/raker/lib/apache-solr-dataimporthandler-1.3.0.jar +0 -0
- data/raker/lib/axis.jar +0 -0
- data/raker/lib/commons-codec-1.3.jar +0 -0
- data/raker/lib/commons-csv-1.0-SNAPSHOT-r609327.jar +0 -0
- data/raker/lib/commons-daemon.jar +0 -0
- data/raker/lib/commons-dbcp-1.2.1.jar +0 -0
- data/raker/lib/commons-discovery-0.2.jar +0 -0
- data/raker/lib/commons-fileupload-1.2.jar +0 -0
- data/raker/lib/commons-httpclient-3.1.jar +0 -0
- data/raker/lib/commons-io-1.3.1.jar +0 -0
- data/raker/lib/commons-logging-1.0.4.jar +0 -0
- data/raker/lib/commons-pool-1.3.jar +0 -0
- data/raker/lib/delicious-1.13.jar +0 -0
- data/raker/lib/flickrapi-1.0a9.jar +0 -0
- data/raker/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/raker/lib/javax.jms.jar +0 -0
- data/raker/lib/jaxrpc.jar +0 -0
- data/raker/lib/jdom.jar +0 -0
- data/raker/lib/jericho.jar +0 -0
- data/raker/lib/junit-4.3.jar +0 -0
- data/raker/lib/log4j-1.2.8.jar +0 -0
- data/raker/lib/lucene-analyzers-2.4-dev.jar +0 -0
- data/raker/lib/lucene-core-2.4-dev.jar +0 -0
- data/raker/lib/lucene-highlighter-2.4-dev.jar +0 -0
- data/raker/lib/lucene-memory-2.4-dev.jar +0 -0
- data/raker/lib/lucene-queries-2.4-dev.jar +0 -0
- data/raker/lib/lucene-snowball-2.4-dev.jar +0 -0
- data/raker/lib/lucene-spellchecker-2.4-dev.jar +0 -0
- data/raker/lib/mail.jar +0 -0
- data/raker/lib/mysql-connector-java-5.1.7-bin.jar +0 -0
- data/raker/lib/recommenderd.jar +0 -0
- data/raker/lib/recommenderd.properties +64 -0
- data/raker/lib/rome-1.0.jar +0 -0
- data/raker/lib/rome.properties +90 -0
- data/raker/lib/solr-commons-csv-pom.xml.template +36 -0
- data/raker/lib/solr-lucene-analyzers-pom.xml.template +35 -0
- data/raker/lib/solr-lucene-contrib-pom.xml.template +48 -0
- data/raker/lib/solr-lucene-core-pom.xml.template +36 -0
- data/raker/lib/solr-lucene-highlighter-pom.xml.template +38 -0
- data/raker/lib/solr-lucene-queries-pom.xml.template +38 -0
- data/raker/lib/solr-lucene-snowball-pom.xml.template +36 -0
- data/raker/lib/solr-lucene-spellchecker-pom.xml.template +36 -0
- data/raker/lib/stax-utils.jar +0 -0
- data/raker/lib/wsdl4j-1.5.1.jar +0 -0
- data/raker/lib/wstx-asl-3.2.7.jar +0 -0
- data/tasks/bootstrap.rake +18 -0
- data/tasks/raker.rake +68 -0
- data/test/recommender_test.rb +8 -0
- data/test/test_helper.rb +20 -0
- data/uninstall.rb +1 -0
- metadata +253 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
headers["Content-Type"] = "application/atom+xml"
|
2
|
+
xml.instruct!
|
3
|
+
|
4
|
+
xml.feed "xmlns" => "http://www.w3.org/2005/Atom" do
|
5
|
+
|
6
|
+
xml.title "Open Educational Resources"
|
7
|
+
xml.link "rel" => "self", "href" => request.env["REQUEST_URI"]
|
8
|
+
xml.id url_for(:only_path => false, :controller => 'results')
|
9
|
+
xml.updated CGI.rfc1123_date Time.now
|
10
|
+
xml.author { xml.name "Open Educational Resources" }
|
11
|
+
|
12
|
+
xml.image do
|
13
|
+
xml.title 'Open Educational Resources logo'
|
14
|
+
xml.url 'http://www.oerrecommender.org/images/diagram.gif'
|
15
|
+
xml.link 'http://www.oerrecommender.org'
|
16
|
+
xml.description 'Open Educational Resources'
|
17
|
+
end
|
18
|
+
|
19
|
+
@results.each do |result|
|
20
|
+
xml.entry do
|
21
|
+
xml.title result.title
|
22
|
+
xml.link "rel" => "alternate", "href" => result.uri
|
23
|
+
xml.id result.uri
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
<h1><%= t(:title) %> - <%= t(:search)%></h1>
|
2
|
+
|
3
|
+
<% if flash[:notice] %><p style="color: green"><%= flash[:notice] %></p><% end %>
|
4
|
+
|
5
|
+
<%= render(:partial => "search_box") %>
|
6
|
+
|
7
|
+
<% if @results.length > 0%>
|
8
|
+
|
9
|
+
<hr class="divider"/>
|
10
|
+
|
11
|
+
<p id='results-description'><%= t(:results) %> <b><%= @offset %> - <%= (@offset + @limit) < @hit_count ? (@offset + @limit) : @hit_count %></b> <%= t(:of) %> <b><%= @hit_count %></b> <%= t(:for) %> <b><%= @terms %></b></p>
|
12
|
+
|
13
|
+
<div id="results">
|
14
|
+
<%= render(:partial => "result", :collection => @results) %>
|
15
|
+
</div>
|
16
|
+
|
17
|
+
<div>
|
18
|
+
<b><%= (link_to(t(:previous), {:action => "results", :terms => URI.unescape(@term_list), :locale => @language, :limit => @limit, :offset => @offset - @limit}, :rel => "nofollow") + " ") if @offset > 0 %>
|
19
|
+
<%= (link_to(t(:next), {:action => "results", :terms => URI.unescape(@term_list), :locale => @language,:limit => @limit, :offset => @offset + @limit}, :rel => "nofollow") if @results.size >= @limit )%>
|
20
|
+
</b>
|
21
|
+
</div>
|
22
|
+
|
23
|
+
<p>
|
24
|
+
<a rel="nofollow" href="<%=@current_uri%>.rss<%=@url_terms%>" title="RSS for current page"><%= t(:rss) %></a>
|
25
|
+
<a rel="nofollow" href="<%=@current_uri%>.rss<%=@url_terms%>" title="RSS for current page"><img src="/images/rss.gif" alt="RSS" title="RSS" border="0"></a>
|
26
|
+
<a rel="nofollow" href="<%=@current_uri%>.xml<%=@url_terms%>" title="XML for current page"><%= t(:xml) %></a>
|
27
|
+
<a rel="nofollow" href="<%=@current_uri%>.xml<%=@url_terms%>" title="XML for current page"><img src="/images/rss.gif" alt="XML" title="XML" border="0"></a>
|
28
|
+
<a rel="nofollow" href="<%=@current_uri%>.atom<%=@url_terms%>" title="Atom for current page"><%= t(:atom) %></a>
|
29
|
+
<a rel="nofollow" href="<%=@current_uri%>.atom<%=@url_terms%>" title="Atom for current page"><img src="/images/rss.gif" alt="Atom" title="Atom" border="0"></a>
|
30
|
+
<a rel="nofollow" href="<%=@current_uri%>.rdf<%=@url_terms%>" title="RDF for current page"><%= t(:rdf) %></a>
|
31
|
+
<a rel="nofollow" href="<%=@current_uri%>.rdf<%=@url_terms%>" title="RDF for current page"><img src="/images/rdf.gif" alt="RDF" title="RDF" border="0"></a>
|
32
|
+
</p>
|
33
|
+
<% else %>
|
34
|
+
<p><%= t(:no_hits) %></p>
|
35
|
+
<% end %>
|
@@ -0,0 +1,13 @@
|
|
1
|
+
<%if !@json_results.nil? %>
|
2
|
+
var recs = <%= @json_results %>;
|
3
|
+
|
4
|
+
document.write('<div class="oer_recommender_container">');
|
5
|
+
document.write('<div class="oer_recommender_list">');
|
6
|
+
for(nRec = 0; nRec < recs.length; nRec++) {
|
7
|
+
r = recs[nRec];
|
8
|
+
document.write('<p class="oer_recommender_item"><a class="oer_recommender_hit_link" href="' + (r.direct_link == null ? r.permalink : r.direct_link) + '">' + r.title + ' (' + r.collection + ')</a></p>');
|
9
|
+
document.write('<p class="oer_recommender_item_description">' + (r.description.length > 200 ? r.description.substring(0,200) + '...' : r.description) + '</a></p>');
|
10
|
+
}
|
11
|
+
document.write('</div>');
|
12
|
+
document.write('</div>');
|
13
|
+
<% end %>
|
@@ -0,0 +1,24 @@
|
|
1
|
+
headers["Content-Type"] = "application/rdf+xml"
|
2
|
+
xml.instruct!
|
3
|
+
|
4
|
+
xml.RDF :RDF, "xmlns:RDF" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "xmlns:result" => "http://www.oerrecommender.org/rdf#" do
|
5
|
+
|
6
|
+
xml.RDF :Description, "RDF:about"=>"http://oerrecommender.org/result" do
|
7
|
+
xml.result :name do
|
8
|
+
xml.text! 'Results for ' + html_escape(request.env["REQUEST_URI"])
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
@results.each do |result|
|
13
|
+
xml.RDF :Description, "RDF:about" => result.uri do
|
14
|
+
xml.result :title do xml.text! result.title end
|
15
|
+
xml.result :uri do xml.text! result.uri end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
xml.RDF :Seq, "RDF:about" => url_for(:only_path => false, :controller => 'search') do
|
20
|
+
@results.each do |result|
|
21
|
+
xml.RDF :li, "RDF:resource" => result.uri
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
headers["Content-Type"] = "application/rss+xml"
|
2
|
+
xml.instruct! :xml, :version=>"1.0"
|
3
|
+
xml.rss "version" => "2.0", "xmlns:dc" => "http://purl.org/dc/elements/1.1/" do
|
4
|
+
xml.channel do
|
5
|
+
|
6
|
+
xml.title 'OER Recommender - Search Results for: ' + URI.unescape(params[:terms])
|
7
|
+
xml.link url_for(request.env["REQUEST_URI"])
|
8
|
+
xml.pubDate CGI.rfc1123_date Time.now
|
9
|
+
xml.description 'OER Recommender - Search Results for: ' + URI.unescape(params[:terms])
|
10
|
+
xml.generator 'OER Recommender'
|
11
|
+
|
12
|
+
xml.image do
|
13
|
+
xml.title 'Open Educational Resources logo'
|
14
|
+
xml.url 'http://www.oerrecommender.org/images/diagram.gif'
|
15
|
+
xml.link 'http://www.oerrecommender.org'
|
16
|
+
xml.description 'Open Educational Resources'
|
17
|
+
end
|
18
|
+
|
19
|
+
@results.each do |result|
|
20
|
+
xml.item do
|
21
|
+
xml.title result.title
|
22
|
+
xml.link result.uri
|
23
|
+
xml.guid result.uri
|
24
|
+
xml.pubDate result.published_at
|
25
|
+
xml.description truncate_words(result.description)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,19 @@
|
|
1
|
+
headers["Content-Type"] = "application/xml"
|
2
|
+
xml.instruct!
|
3
|
+
|
4
|
+
xml.results(:search => URI.unescape(params[:terms]), :hits => @hit_count, :offset => @offset, :limit => @limit)do
|
5
|
+
@results.each do |result|
|
6
|
+
xml.result(:published_at => result.published_at, :relevance => result.solr_score) do
|
7
|
+
xml.id result.id
|
8
|
+
xml.title result.title
|
9
|
+
xml.description truncate_words(result.description)
|
10
|
+
xml.uri result.uri
|
11
|
+
xml.direct_link result.direct_link
|
12
|
+
xml.collection do
|
13
|
+
xml.title result.feed.title
|
14
|
+
xml.short_title result.feed.short_title
|
15
|
+
xml.uri result.feed.uri
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
ActionController::Routing::Routes.draw do |map|
|
2
|
+
|
3
|
+
# admin
|
4
|
+
map.namespace :admin do |a|
|
5
|
+
a.resources :feeds, :controller => 'recommender/feeds', :member => { :harvest_now => :post, :ban => :post, :unban => :post }
|
6
|
+
end
|
7
|
+
|
8
|
+
map.connect '/feed_list', :controller => 'recommender/feeds', :action => 'selection_list'
|
9
|
+
map.connect '/widgets', :controller => 'recommender/default', :action => 'widgets'
|
10
|
+
map.connect '/tour', :controller => 'recommender/default', :action => 'tour'
|
11
|
+
|
12
|
+
map.resources :entries, :controller => 'recommender/entries'
|
13
|
+
map.connect 'r', :controller => 'recommender/entries', :action => 'track_clicks'
|
14
|
+
map.connect 'entries/tags/*tags', :controller => 'recommender/entries', :action => 'browse_by_tags'
|
15
|
+
map.connect 'entries/search/*terms', :controller => 'recommender/entries', :action => 'search'
|
16
|
+
map.connect 'collections', :controller => 'entries', :action => 'collections'
|
17
|
+
|
18
|
+
map.resources :recommendations, :controller => 'recommender/recommendations'
|
19
|
+
|
20
|
+
# redirect (and hit tracking)
|
21
|
+
|
22
|
+
# search
|
23
|
+
map.connect 'search/relations.:format/*terms', :controller => 'search', :action => 'relations'
|
24
|
+
map.connect 'search/relations/*terms', :controller => 'search', :action => 'relations'
|
25
|
+
map.connect 'search/source_uri.:format', :controller => 'search', :action => 'source_uri'
|
26
|
+
map.connect 'search/source_uri', :controller => 'search', :action => 'source_uri'
|
27
|
+
map.connect 'search/destination_uri.:format', :controller => 'search', :action => 'destination_uri'
|
28
|
+
map.connect 'search/destination_uri', :controller => 'search', :action => 'destination_uri'
|
29
|
+
map.connect 'search/uris.:format', :controller => 'search', :action => 'uris'
|
30
|
+
map.connect 'search/uris', :controller => 'search', :action => 'uris'
|
31
|
+
map.connect 'search/results.:format', :controller => 'search', :action => 'results'
|
32
|
+
map.connect 'search/results', :controller => 'search', :action => 'results'
|
33
|
+
|
34
|
+
# You can have the root of your site routed by hooking up ''
|
35
|
+
# -- just remember to delete public/index.html.
|
36
|
+
map.connect '/', :controller => 'default', :action => 'index'
|
37
|
+
map.connect '/frames', :controller => 'documents', :action => 'frames'
|
38
|
+
|
39
|
+
end
|
File without changes
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# The ASF licenses this file to You under the Apache License, Version 2.0
|
2
|
+
# (the "License"); you may not use this file except in compliance with
|
3
|
+
# the License. You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
#-----------------------------------------------------------------------
|
14
|
+
# Use a protected word file to protect against the stemmer reducing two
|
15
|
+
# unrelated words to the same base word.
|
16
|
+
|
17
|
+
# Some non-words that normally won't be encountered,
|
18
|
+
# just to test that they won't be stemmed.
|
19
|
+
dontstems
|
20
|
+
zwhacky
|
21
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
|
19
|
+
<schema name="de" version="1.1">
|
20
|
+
<types>
|
21
|
+
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
22
|
+
<fieldType name="integer" class="solr.IntField" omitNorms="false"/>
|
23
|
+
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
24
|
+
<analyzer type="index">
|
25
|
+
<tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/>
|
26
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
27
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
28
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
29
|
+
<filter class="solr.GermanStemFilterFactory" protected="protwords.txt"/>
|
30
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
31
|
+
</analyzer>
|
32
|
+
<analyzer type="query">
|
33
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
34
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
35
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
36
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
37
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
38
|
+
<filter class="solr.GermanStemFilterFactory" protected="protwords.txt"/>
|
39
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
40
|
+
</analyzer>
|
41
|
+
</fieldType>
|
42
|
+
<fieldType name="tag" class="solr.TextField" positionIncrementGap="100">
|
43
|
+
<analyzer type="index">
|
44
|
+
<tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/>
|
45
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
46
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
47
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
48
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
49
|
+
</analyzer>
|
50
|
+
</fieldType>
|
51
|
+
</types>
|
52
|
+
|
53
|
+
|
54
|
+
<fields>
|
55
|
+
<!-- general -->
|
56
|
+
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
|
57
|
+
|
58
|
+
<field name="pk_i" type="integer" indexed="true" stored="true"/>
|
59
|
+
<field name="pk_s" type="string" indexed="true" stored="true"/>
|
60
|
+
<field name="text" type="text" indexed="true" stored="true" termVectors="true" multiValued="true"/>
|
61
|
+
<field name="tag" type="tag" indexed="true" stored="true" termVectors="true" multiValued="true"/>
|
62
|
+
<dynamicField name="*_i" type="integer" indexed="true" stored="false"/>
|
63
|
+
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
64
|
+
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
65
|
+
<dynamicField name="*_facet" type="string" indexed="true" stored="false"/>
|
66
|
+
<dynamicField name="*_s_mv" type="string" indexed="true" stored="false" multiValued="true"/>
|
67
|
+
</fields>
|
68
|
+
|
69
|
+
<!-- field to use to determine and enforce document uniqueness. -->
|
70
|
+
<uniqueKey>id</uniqueKey>
|
71
|
+
|
72
|
+
<copyField source="*_t" dest="text"/>
|
73
|
+
<copyField source="*_facet" dest="text"/>
|
74
|
+
|
75
|
+
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
76
|
+
<defaultSearchField>text</defaultSearchField>
|
77
|
+
|
78
|
+
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
79
|
+
<solrQueryParser defaultOperator="OR"/>
|
80
|
+
</schema>
|
81
|
+
|
@@ -0,0 +1,304 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
|
19
|
+
<config>
|
20
|
+
<!-- Set this to 'false' if you want solr to continue working after it has
|
21
|
+
encountered an severe configuration error. In a production environment,
|
22
|
+
you may want solr to keep working even if one handler is mis-configured.
|
23
|
+
|
24
|
+
You may also set this to false using by setting the system property:
|
25
|
+
-Dsolr.abortOnConfigurationError=false
|
26
|
+
-->
|
27
|
+
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
|
28
|
+
|
29
|
+
<!-- Used to specify an alternate directory to hold all index data
|
30
|
+
other than the default ./data under the Solr home.
|
31
|
+
If replication is in use, this should match the replication configuration. -->
|
32
|
+
<dataDir>${solr.data.dir:../../../../../solr_indexes}/de</dataDir>
|
33
|
+
|
34
|
+
<!-- Values here affect all index writers and act as a default unless overridden. -->
|
35
|
+
<indexDefaults>
|
36
|
+
<useCompoundFile>false</useCompoundFile>
|
37
|
+
<mergeFactor>10</mergeFactor>
|
38
|
+
<maxBufferedDocs>1000</maxBufferedDocs>
|
39
|
+
<maxMergeDocs>2147483647</maxMergeDocs>
|
40
|
+
<maxFieldLength>10000</maxFieldLength>
|
41
|
+
<writeLockTimeout>1000</writeLockTimeout>
|
42
|
+
<commitLockTimeout>10000</commitLockTimeout>
|
43
|
+
</indexDefaults>
|
44
|
+
<!-- options specific to the main on-disk lucene index -->
|
45
|
+
<!--
|
46
|
+
<mainIndex>
|
47
|
+
<useCompoundFile>false</useCompoundFile>
|
48
|
+
<mergeFactor>10</mergeFactor>
|
49
|
+
<maxBufferedDocs>1000</maxBufferedDocs>
|
50
|
+
<maxMergeDocs>2147483647</maxMergeDocs>
|
51
|
+
<maxFieldLength>10000</maxFieldLength>
|
52
|
+
|
53
|
+
<unlockOnStartup>false</unlockOnStartup>
|
54
|
+
</mainIndex>
|
55
|
+
-->
|
56
|
+
<!-- If true, unlock any held write or commit locks on startup.
|
57
|
+
This defeats the locking mechanism that allows multiple
|
58
|
+
processes to safely access a lucene index, and should be
|
59
|
+
used with care. -->
|
60
|
+
|
61
|
+
<!-- the default high-performance update handler -->
|
62
|
+
|
63
|
+
<updateHandler class="solr.DirectUpdateHandler2">
|
64
|
+
|
65
|
+
<!-- A prefix of "solr." for class names is an alias that
|
66
|
+
causes solr to search appropriate packages, including
|
67
|
+
org.apache.solr.(search|update|request|core|analysis)
|
68
|
+
-->
|
69
|
+
|
70
|
+
<!-- autocommit pending docs if certain criteria are met
|
71
|
+
<autoCommit>
|
72
|
+
<maxDocs>10000</maxDocs>
|
73
|
+
<maxTime>1000</maxTime>
|
74
|
+
</autoCommit>
|
75
|
+
-->
|
76
|
+
|
77
|
+
<!-- The RunExecutableListener executes an external command.
|
78
|
+
exe - the name of the executable to run
|
79
|
+
dir - dir to use as the current working directory. default="."
|
80
|
+
wait - the calling thread waits until the executable returns. default="true"
|
81
|
+
args - the arguments to pass to the program. default=nothing
|
82
|
+
env - environment variables to set. default=nothing
|
83
|
+
-->
|
84
|
+
<!-- A postCommit event is fired after every commit or optimize command
|
85
|
+
<listener event="postCommit" class="solr.RunExecutableListener">
|
86
|
+
<str name="exe">snapshooter</str>
|
87
|
+
<str name="dir">solr/bin</str>
|
88
|
+
<bool name="wait">true</bool>
|
89
|
+
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
|
90
|
+
<arr name="env"> <str>MYVAR=val1</str> </arr>
|
91
|
+
</listener>
|
92
|
+
-->
|
93
|
+
<!-- A postOptimize event is fired only after every optimize command, useful
|
94
|
+
in conjunction with index distribution to only distribute optimized indicies
|
95
|
+
<listener event="postOptimize" class="solr.RunExecutableListener">
|
96
|
+
<str name="exe">snapshooter</str>
|
97
|
+
<str name="dir">solr/bin</str>
|
98
|
+
<bool name="wait">true</bool>
|
99
|
+
</listener>
|
100
|
+
-->
|
101
|
+
|
102
|
+
</updateHandler>
|
103
|
+
|
104
|
+
|
105
|
+
<query>
|
106
|
+
<!-- Maximum number of clauses in a boolean query... can affect
|
107
|
+
range or prefix queries that expand to big boolean
|
108
|
+
queries. An exception is thrown if exceeded. -->
|
109
|
+
<maxBooleanClauses>1024</maxBooleanClauses>
|
110
|
+
|
111
|
+
|
112
|
+
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
|
113
|
+
unordered sets of *all* documents that match a query.
|
114
|
+
When a new searcher is opened, its caches may be prepopulated
|
115
|
+
or "autowarmed" using data from caches in the old searcher.
|
116
|
+
autowarmCount is the number of items to prepopulate. For LRUCache,
|
117
|
+
the autowarmed items will be the most recently accessed items.
|
118
|
+
Parameters:
|
119
|
+
class - the SolrCache implementation (currently only LRUCache)
|
120
|
+
size - the maximum number of entries in the cache
|
121
|
+
initialSize - the initial capacity (number of entries) of
|
122
|
+
the cache. (seel java.util.HashMap)
|
123
|
+
autowarmCount - the number of entries to prepopulate from
|
124
|
+
and old cache.
|
125
|
+
-->
|
126
|
+
<filterCache
|
127
|
+
class="solr.LRUCache"
|
128
|
+
size="512"
|
129
|
+
initialSize="512"
|
130
|
+
autowarmCount="256"/>
|
131
|
+
|
132
|
+
<!-- queryResultCache caches results of searches - ordered lists of
|
133
|
+
document ids (DocList) based on a query, a sort, and the range
|
134
|
+
of documents requested. -->
|
135
|
+
<queryResultCache
|
136
|
+
class="solr.LRUCache"
|
137
|
+
size="512"
|
138
|
+
initialSize="512"
|
139
|
+
autowarmCount="256"/>
|
140
|
+
|
141
|
+
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
|
142
|
+
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
|
143
|
+
<documentCache
|
144
|
+
class="solr.LRUCache"
|
145
|
+
size="512"
|
146
|
+
initialSize="512"
|
147
|
+
autowarmCount="0"/>
|
148
|
+
|
149
|
+
<!-- If true, stored fields that are not requested will be loaded lazily.
|
150
|
+
|
151
|
+
This can result in a significant speed improvement if the usual case is to
|
152
|
+
not load all stored fields, especially if the skipped fields are large compressed
|
153
|
+
text fields.
|
154
|
+
-->
|
155
|
+
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
156
|
+
|
157
|
+
<!-- Example of a generic cache. These caches may be accessed by name
|
158
|
+
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
|
159
|
+
The purpose is to enable easy caching of user/application level data.
|
160
|
+
The regenerator argument should be specified as an implementation
|
161
|
+
of solr.search.CacheRegenerator if autowarming is desired. -->
|
162
|
+
<!--
|
163
|
+
<cache name="myUserCache"
|
164
|
+
class="solr.LRUCache"
|
165
|
+
size="4096"
|
166
|
+
initialSize="1024"
|
167
|
+
autowarmCount="1024"
|
168
|
+
regenerator="org.mycompany.mypackage.MyRegenerator"
|
169
|
+
/>
|
170
|
+
-->
|
171
|
+
|
172
|
+
<!-- An optimization that attempts to use a filter to satisfy a search.
|
173
|
+
If the requested sort does not include score, then the filterCache
|
174
|
+
will be checked for a filter matching the query. If found, the filter
|
175
|
+
will be used as the source of document ids, and then the sort will be
|
176
|
+
applied to that.
|
177
|
+
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
178
|
+
-->
|
179
|
+
|
180
|
+
<!-- An optimization for use with the queryResultCache. When a search
|
181
|
+
is requested, a superset of the requested number of document ids
|
182
|
+
are collected. For example, if a search for a particular query
|
183
|
+
requests matching documents 10 through 19, and queryWindowSize is 50,
|
184
|
+
then documents 0 through 50 will be collected and cached. Any further
|
185
|
+
requests in that range can be satisfied via the cache. -->
|
186
|
+
<queryResultWindowSize>10</queryResultWindowSize>
|
187
|
+
|
188
|
+
<!-- This entry enables an int hash representation for filters (DocSets)
|
189
|
+
when the number of items in the set is less than maxSize. For smaller
|
190
|
+
sets, this representation is more memory efficient, more efficient to
|
191
|
+
iterate over, and faster to take intersections. -->
|
192
|
+
<HashDocSet maxSize="3000" loadFactor="0.75"/>
|
193
|
+
|
194
|
+
|
195
|
+
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
|
196
|
+
into cached filters if the number of docs selected by the clause exceeds
|
197
|
+
the threshold (represented as a fraction of the total index) -->
|
198
|
+
<boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
|
199
|
+
|
200
|
+
|
201
|
+
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
202
|
+
and there is a current searcher handling requests (aka registered). -->
|
203
|
+
<!-- QuerySenderListener takes an array of NamedList and executes a
|
204
|
+
local query request for each NamedList in sequence. -->
|
205
|
+
<!--
|
206
|
+
<listener event="newSearcher" class="solr.QuerySenderListener">
|
207
|
+
<arr name="queries">
|
208
|
+
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
209
|
+
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
210
|
+
</arr>
|
211
|
+
</listener>
|
212
|
+
-->
|
213
|
+
|
214
|
+
<!-- a firstSearcher event is fired whenever a new searcher is being
|
215
|
+
prepared but there is no current registered searcher to handle
|
216
|
+
requests or to gain autowarming data from. -->
|
217
|
+
<!--
|
218
|
+
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
219
|
+
<arr name="queries">
|
220
|
+
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
221
|
+
</arr>
|
222
|
+
</listener>
|
223
|
+
-->
|
224
|
+
|
225
|
+
<!-- If a search request comes in and there is no current registered searcher,
|
226
|
+
then immediately register the still warming searcher and use it. If
|
227
|
+
"false" then all requests will block until the first searcher is done
|
228
|
+
warming. -->
|
229
|
+
<useColdSearcher>false</useColdSearcher>
|
230
|
+
|
231
|
+
<!-- Maximum number of searchers that may be warming in the background
|
232
|
+
concurrently. An error is returned if this limit is exceeded. Recommend
|
233
|
+
1-2 for read-only slaves, higher for masters w/o cache warming. -->
|
234
|
+
<maxWarmingSearchers>4</maxWarmingSearchers>
|
235
|
+
|
236
|
+
</query>
|
237
|
+
|
238
|
+
<!--
|
239
|
+
Let the dispatch filter handler /select?qt=XXX
|
240
|
+
handleSelect=true will use consistent error handling for /select and /update
|
241
|
+
handleSelect=false will use solr1.1 style error formatting
|
242
|
+
-->
|
243
|
+
<requestDispatcher handleSelect="true" >
|
244
|
+
<!--Make sure your system has some authentication before enabling remote streaming! -->
|
245
|
+
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
|
246
|
+
</requestDispatcher>
|
247
|
+
|
248
|
+
|
249
|
+
<!-- requestHandler plugins... incoming queries will be dispatched to the
|
250
|
+
correct handler based on the qt (query type) param matching the
|
251
|
+
name of registered handlers.
|
252
|
+
The "standard" request handler is the default and will be used if qt
|
253
|
+
is not specified in the request.
|
254
|
+
-->
|
255
|
+
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
|
256
|
+
<!-- default values for query parameters -->
|
257
|
+
<lst name="defaults">
|
258
|
+
<str name="echoParams">explicit</str>
|
259
|
+
<str name="json.nl">map</str>
|
260
|
+
<!--
|
261
|
+
<int name="rows">10</int>
|
262
|
+
<str name="fl">*</str>
|
263
|
+
<str name="version">2.1</str>
|
264
|
+
-->
|
265
|
+
</lst>
|
266
|
+
</requestHandler>
|
267
|
+
|
268
|
+
<!-- Note how you can register the same handler multiple times with
|
269
|
+
different names (and different init parameters)
|
270
|
+
-->
|
271
|
+
<!-- Update request handler.
|
272
|
+
|
273
|
+
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
|
274
|
+
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
|
275
|
+
The response format differs from solr1.1 formatting and returns a standard error code.
|
276
|
+
|
277
|
+
To enable solr1.1 behavior, remove the /update handler or change its path
|
278
|
+
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
|
279
|
+
-->
|
280
|
+
|
281
|
+
<!-- queryResponseWriter plugins... query responses will be written using the
|
282
|
+
writer specified by the 'wt' request parameter matching the name of a registered
|
283
|
+
writer.
|
284
|
+
The "standard" writer is the default and will be used if 'wt' is not specified
|
285
|
+
in the request. XMLResponseWriter will be used if nothing is specified here.
|
286
|
+
The json, python, and ruby writers are also available by default.
|
287
|
+
|
288
|
+
<queryResponseWriter name="standard" class="org.apache.solr.request.XMLResponseWriter"/>
|
289
|
+
<queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/>
|
290
|
+
<queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/>
|
291
|
+
<queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/>
|
292
|
+
|
293
|
+
<queryResponseWriter name="custom" class="com.example.MyResponseWriter"/>
|
294
|
+
-->
|
295
|
+
|
296
|
+
<!-- XSLT response writer transforms the XML output by any xslt file found
|
297
|
+
in Solr's conf/xslt directory. Changes to xslt files are checked for
|
298
|
+
every xsltCacheLifetimeSeconds.
|
299
|
+
-->
|
300
|
+
<queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter">
|
301
|
+
<int name="xsltCacheLifetimeSeconds">5</int>
|
302
|
+
</queryResponseWriter>
|
303
|
+
|
304
|
+
</config>
|