ruby-fedora 0.1.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING.LESSER.txt +165 -0
- data/COPYING.txt +674 -0
- data/Manifest.txt +19 -20
- data/README.txt +6 -1
- data/Rakefile +4 -0
- data/config/hoe.rb +2 -2
- data/config/requirements.rb +9 -2
- data/lib/active_fedora.rb +41 -0
- data/lib/active_fedora/base.rb +278 -8
- data/lib/active_fedora/content_model.rb +22 -0
- data/lib/active_fedora/datastream.rb +95 -0
- data/lib/active_fedora/fedora_object.rb +84 -0
- data/lib/active_fedora/metadata_datastream.rb +97 -0
- data/lib/active_fedora/model.rb +94 -0
- data/lib/active_fedora/property.rb +15 -0
- data/lib/active_fedora/qualified_dublin_core_datastream.rb +72 -0
- data/lib/active_fedora/relationship.rb +43 -0
- data/lib/active_fedora/rels_ext_datastream.rb +43 -0
- data/lib/active_fedora/semantic_node.rb +221 -0
- data/lib/active_fedora/solr_service.rb +20 -0
- data/lib/fedora/base.rb +2 -1
- data/lib/fedora/connection.rb +104 -134
- data/lib/fedora/datastream.rb +10 -1
- data/lib/fedora/fedora_object.rb +28 -24
- data/lib/fedora/generic_search.rb +71 -0
- data/lib/fedora/repository.rb +47 -3
- data/lib/ruby-fedora.rb +9 -8
- data/lib/util/class_level_inheritable_attributes.rb +23 -0
- data/solr/config/schema.xml +229 -0
- metadata +37 -24
- data/lib/active-fedora.rb +0 -1
- data/lib/ambition/adapters/active_fedora.rb +0 -10
- data/lib/ambition/adapters/active_fedora/base.rb +0 -14
- data/lib/ambition/adapters/active_fedora/query.rb +0 -48
- data/lib/ambition/adapters/active_fedora/select.rb +0 -104
- data/lib/ambition/adapters/active_fedora/slice.rb +0 -19
- data/lib/ambition/adapters/active_fedora/sort.rb +0 -43
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/script/txt2html +0 -74
- data/tasks/deployment.rake +0 -34
- data/tasks/environment.rake +0 -7
- data/tasks/website.rake +0 -17
- data/website/index.html +0 -93
- data/website/index.txt +0 -39
- data/website/javascripts/rounded_corners_lite.inc.js +0 -285
- data/website/stylesheets/screen.css +0 -138
- data/website/template.rhtml +0 -48
data/lib/fedora/datastream.rb
CHANGED
@@ -12,11 +12,20 @@ class Fedora::Datastream < Fedora::BaseObject
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def dsid
|
15
|
-
attributes
|
15
|
+
if attributes.has_key?(:dsid)
|
16
|
+
attributes[:dsid]
|
17
|
+
else
|
18
|
+
attributes[:dsID]
|
19
|
+
end
|
16
20
|
end
|
17
21
|
|
18
22
|
# See http://www.fedora.info/definitions/identifiers/
|
19
23
|
def uri
|
20
24
|
"fedora:info/#{pid}/datastreams/#{dsid}"
|
21
25
|
end
|
26
|
+
|
27
|
+
# @returns the url of the datastream in Fedora, without the repository userinfo
|
28
|
+
def url
|
29
|
+
return "#{Fedora::Repository.instance.base_url}/objects/#{pid}/datastreams/#{dsid}"
|
30
|
+
end
|
22
31
|
end
|
data/lib/fedora/fedora_object.rb
CHANGED
@@ -33,19 +33,25 @@ class Fedora::FedoraObject < Fedora::BaseObject
|
|
33
33
|
# TODO: Mix In DC attribute finders/accessors
|
34
34
|
# TODO: Make sure that fedora_object_type and contentModel are juggled properly.
|
35
35
|
def retrieve_attr_from_fedora
|
36
|
-
self.attributes.merge!(
|
36
|
+
self.attributes.merge!(profile)
|
37
|
+
object_rexml = REXML::Document.new(object_xml)
|
37
38
|
self.attributes.merge!({
|
38
|
-
:state =>
|
39
|
+
:state => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#state']"].attributes["value"],
|
40
|
+
:create_date => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#createdDate']"].attributes["value"],
|
41
|
+
:modified_date => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#lastModifiedDate']"].attributes["value"],
|
42
|
+
:label => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#label']"].attributes["value"]
|
39
43
|
})
|
40
44
|
|
41
45
|
end
|
42
46
|
|
43
47
|
def create_date
|
44
|
-
|
48
|
+
profile[:create_date]
|
49
|
+
#self.attributes[:cDate]
|
45
50
|
end
|
46
51
|
|
47
52
|
def modified_date
|
48
|
-
|
53
|
+
profile[:create_date]
|
54
|
+
#self.attributes[:mDate]
|
49
55
|
end
|
50
56
|
|
51
57
|
|
@@ -77,14 +83,6 @@ class Fedora::FedoraObject < Fedora::BaseObject
|
|
77
83
|
self.attributes[:label] = new_label
|
78
84
|
end
|
79
85
|
|
80
|
-
def content_model
|
81
|
-
self.attributes[:contentModel]
|
82
|
-
end
|
83
|
-
|
84
|
-
def content_model=(new_content_model)
|
85
|
-
self.attributes[:contentModel] = new_content_model
|
86
|
-
end
|
87
|
-
|
88
86
|
# Get the object and read its @ownerId from the profile
|
89
87
|
def owner_id
|
90
88
|
self.attributes[:ownerID]
|
@@ -96,24 +94,30 @@ class Fedora::FedoraObject < Fedora::BaseObject
|
|
96
94
|
|
97
95
|
def profile
|
98
96
|
# Use xmlsimple to slurp the attributes
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
:date_created => objectProfile[objCreateDate],
|
106
|
-
:date_modified => objectProfile[objLastModDate]
|
107
|
-
}
|
97
|
+
retrieved_profile = XmlSimple.xml_in(Fedora::Repository.instance.fetch_custom(self.pid, :profile))
|
98
|
+
profile_hash = Hash[:owner_id => retrieved_profile["objOwnerId"],
|
99
|
+
:label => retrieved_profile["objLabel"],
|
100
|
+
:create_date => retrieved_profile["objCreateDate"],
|
101
|
+
:modified_date => retrieved_profile["objLastModDate"]
|
102
|
+
]
|
108
103
|
end
|
109
104
|
|
110
|
-
def
|
111
|
-
|
112
|
-
|
105
|
+
def object_xml
|
106
|
+
Fedora::Repository.instance.fetch_custom(pid, :objectXML)
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.object_xml(pid=pid)
|
110
|
+
Fedora::Repository.instance.fetch_custom(pid, :objectXML)
|
113
111
|
end
|
114
112
|
|
115
113
|
# See http://www.fedora.info/definitions/identifiers
|
116
114
|
def uri
|
117
115
|
"fedora:info/#{pid}"
|
118
116
|
end
|
117
|
+
|
118
|
+
# @returns the url of the object in Fedora, without the repository userinfo
|
119
|
+
def url
|
120
|
+
repo_url = Fedora::Repository.instance.fedora_url
|
121
|
+
return "#{repo_url.scheme}://#{repo_url.host}:#{repo_url.port}#{repo_url.path}/objects/#{pid}"
|
122
|
+
end
|
119
123
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#
|
2
|
+
# @Creator Matt Zumwalt, MediaShelf LLC
|
3
|
+
# @Copyright Matt Zumwalt, 2007. All Rights Reserved.
|
4
|
+
#
|
5
|
+
|
6
|
+
module Fedora
|
7
|
+
class GenericSearch
|
8
|
+
|
9
|
+
def initialize(uri, service_name)
|
10
|
+
@uri = "#{uri}/#{service_name}"
|
11
|
+
@client = HTTPClient.new(@uri)
|
12
|
+
@extheader = {'User-Agent'=>"RubyFedora"}
|
13
|
+
end
|
14
|
+
|
15
|
+
def call_resource
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
# TODO: Handle ruby-tyle params, camel-casing them before passing to call_resource...
|
20
|
+
def update_index(params)
|
21
|
+
query = {:action=>action,:value=>value,:repositoryName=>repository_name,:indexName=>indexname,:restXslt=>"copyXML"}.merge(params)
|
22
|
+
query.merge {:operation> "updateIndex"}
|
23
|
+
|
24
|
+
@client.get(@uri, query, @extheader)
|
25
|
+
return "update_index Not Implemented."
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def browse_index(params)
|
30
|
+
# Sample: /fedoragsearch/rest?operation=browseIndex&startTerm=&fieldName=PID&termPageSize=20&indexName=FedoraIndex&restXslt=copyXml&resultPageXslt=browseIndexToResultPage
|
31
|
+
query = {:startTerm=>URLEncoder.encode(start_term, "UTF-8"),:fieldName=>"",:indexName=>"",:termPageSize=>"", :restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
|
32
|
+
query.merge {:operation> "browseIndex"}
|
33
|
+
@client.get(@uri, query, @extheader)
|
34
|
+
return "browse_index Not Implemented."
|
35
|
+
end
|
36
|
+
|
37
|
+
def gfind_objects(params)
|
38
|
+
# Sample: /fedoragsearch/rest?operation=gfindObjects&query=test&hitPageSize=10&restXslt=copyXml
|
39
|
+
# fieldMaxLength limits the number of characters returned from the value of each object field.
|
40
|
+
# Snippets will highlight matched words within the search results. To keep the xml as simple as possible, set snippetsMax to 0.
|
41
|
+
query = {:query=>URLEncoder.encode(query, "UTF-8"),:value=>value,:indexName=>indexname,:hitPageStart=>"",:hitPageSize=>"",:snippetsMax=>"0",:fieldMaxLength=>"",:restXslt=>"copyXML",:resultPageXslt=>""}.merge(params)
|
42
|
+
query.merge {:operation> "gfindObjects"}
|
43
|
+
@client.get(@uri, query, @extheader)
|
44
|
+
return "gfind_objects Not Implemented."
|
45
|
+
end
|
46
|
+
|
47
|
+
def get_index_info(params)
|
48
|
+
# Sample: /fedoragsearch/rest?operation=getIndexInfo&restXslt=copyXml
|
49
|
+
query = {:indexName=>"",:restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
|
50
|
+
query.merge {:operation> "getIndexInfo"}
|
51
|
+
@client.get(@uri, query, @extheader)
|
52
|
+
return "get_index_info Not Implemented."
|
53
|
+
end
|
54
|
+
|
55
|
+
def get_repository_info(params)
|
56
|
+
# Sample: /fedoragsearch/rest?operation=getRepositoryInfo&restXslt=copyXml
|
57
|
+
query = {:repositoryName=>"",:restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
|
58
|
+
query.merge {:operation> "getRepositoryInfo"}
|
59
|
+
@client.get(@uri, query, @extheader)
|
60
|
+
return "get_repository_info Not Implemented."
|
61
|
+
end
|
62
|
+
|
63
|
+
def configure(params)
|
64
|
+
query = {:configName=>""}.merge(params)
|
65
|
+
query.merge {:operation> "configure"}
|
66
|
+
@client.get(@uri, query, @extheader)
|
67
|
+
return "configure Not Implemented."
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/fedora/repository.rb
CHANGED
@@ -15,6 +15,30 @@ module Fedora
|
|
15
15
|
]
|
16
16
|
|
17
17
|
class Repository
|
18
|
+
|
19
|
+
attr_accessor :repository_name, :base_url, :fedora_version, :pid_namespace, :pid_delimiter
|
20
|
+
|
21
|
+
def self.flush
|
22
|
+
Thread.current[:repo]=nil
|
23
|
+
end
|
24
|
+
def self.register(url, surrogate=nil)
|
25
|
+
Thread.current[:repo]= Fedora::Repository.new(url, surrogate)
|
26
|
+
begin
|
27
|
+
repo = Thread.current[:repo]
|
28
|
+
attributes = repo.describe_repository
|
29
|
+
repo.repository_name = attributes["repositoryName"].first
|
30
|
+
repo.base_url = attributes["repositoryBaseURL"].first
|
31
|
+
repo.fedora_version = attributes["repositoryVersion"].first
|
32
|
+
repo.pid_namespace = attributes["repositoryPID"].first["PID-namespaceIdentifier"].first
|
33
|
+
repo.pid_delimiter = attributes["repositoryPID"].first["PID-delimiter"].first
|
34
|
+
rescue
|
35
|
+
end
|
36
|
+
Thread.current[:repo]
|
37
|
+
end
|
38
|
+
def self.instance
|
39
|
+
raise "did you register a repo?" unless Thread.current[:repo]
|
40
|
+
Thread.current[:repo]
|
41
|
+
end
|
18
42
|
class StringResponse < String
|
19
43
|
attr_reader :content_type
|
20
44
|
|
@@ -26,8 +50,9 @@ module Fedora
|
|
26
50
|
|
27
51
|
attr_accessor :fedora_url
|
28
52
|
|
29
|
-
def initialize(fedora_url =
|
53
|
+
def initialize(fedora_url, surrogate=nil)
|
30
54
|
@fedora_url = fedora_url.is_a?(URI) ? fedora_url : URI.parse(fedora_url)
|
55
|
+
@surrogate = surrogate
|
31
56
|
@connection = nil
|
32
57
|
end
|
33
58
|
|
@@ -37,6 +62,8 @@ module Fedora
|
|
37
62
|
StringResponse.new(response.body, response.content_type)
|
38
63
|
end
|
39
64
|
|
65
|
+
|
66
|
+
|
40
67
|
# Find fedora objects with http://www.fedora.info/wiki/index.php/API-A-Lite_findObjects
|
41
68
|
#
|
42
69
|
# == Parameters
|
@@ -69,6 +96,11 @@ module Fedora
|
|
69
96
|
|
70
97
|
convert_xml(connection.get("#{fedora_url.path}/objects?#{params.to_query}#{includes}"))
|
71
98
|
end
|
99
|
+
def find_model(pid, klazz)
|
100
|
+
obj = self.find_objects("pid=#{pid}").first
|
101
|
+
doc = REXML::Document.new(obj.object_xml, :ignore_whitespace_nodes=>:all)
|
102
|
+
klazz.deserialize(doc)
|
103
|
+
end
|
72
104
|
|
73
105
|
# Create the given object if it's new (not obtained from a find method). Otherwise update the object.
|
74
106
|
#
|
@@ -78,6 +110,12 @@ module Fedora
|
|
78
110
|
def save(object)
|
79
111
|
object.new_object? ? create(object) : update(object)
|
80
112
|
end
|
113
|
+
|
114
|
+
def nextid
|
115
|
+
d = REXML::Document.new(connection.post(fedora_url.path+"/management/getNextPID?xml=true").body)
|
116
|
+
d.elements['//pid'].text
|
117
|
+
end
|
118
|
+
|
81
119
|
|
82
120
|
def create(object)
|
83
121
|
case object
|
@@ -139,7 +177,7 @@ module Fedora
|
|
139
177
|
def delete(object)
|
140
178
|
raise ArgumentError, "Object must not be nil" if object.nil?
|
141
179
|
response = connection.delete("#{url_for(object)}")
|
142
|
-
response.code == '200'
|
180
|
+
response.code == '200' or response.code == '204' # Temporary hack around error in Fedora 3.0 Final's REST API
|
143
181
|
end
|
144
182
|
|
145
183
|
# Fetch the given object using custom method. This is used to fetch other aspects of a fedora object,
|
@@ -162,6 +200,11 @@ module Fedora
|
|
162
200
|
connection.raw_get("#{url_for(object)}#{path}?#{extra_params.to_query}").body
|
163
201
|
end
|
164
202
|
|
203
|
+
def describe_repository
|
204
|
+
result_body = connection.raw_get("#{fedora_url.path}/describe?xml=true").body
|
205
|
+
XmlSimple.xml_in(result_body)
|
206
|
+
end
|
207
|
+
|
165
208
|
private
|
166
209
|
def convert_xml(response)
|
167
210
|
results = FedoraObjects.new
|
@@ -175,6 +218,7 @@ module Fedora
|
|
175
218
|
when Hash
|
176
219
|
results << FedoraObject.new(objectFields.symbolize_keys!)
|
177
220
|
end
|
221
|
+
results.each {|result| result.new_object = false}
|
178
222
|
results
|
179
223
|
end
|
180
224
|
|
@@ -189,7 +233,7 @@ module Fedora
|
|
189
233
|
# or not (defaults to +false+).
|
190
234
|
def connection(refresh = false)
|
191
235
|
if refresh || @connection.nil?
|
192
|
-
@connection =
|
236
|
+
@connection = Fedora::Connection.new(@fedora_url, Fedora::XmlFormat, @surrogate)
|
193
237
|
end
|
194
238
|
@connection
|
195
239
|
end
|
data/lib/ruby-fedora.rb
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
$LOAD_PATH.unshift File.dirname(__FILE__) unless
|
2
|
-
|
3
|
-
$LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
$LOAD_PATH.include?(File.dirname(__FILE__)) || $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
|
4
3
|
|
5
|
-
module
|
4
|
+
module Fedora #:nodoc:
|
6
5
|
module VERSION #:nodoc:
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
unless defined? MAJOR
|
7
|
+
MAJOR = 0
|
8
|
+
MINOR = 9
|
9
|
+
TINY = 0
|
10
10
|
|
11
|
-
|
11
|
+
STRING = [MAJOR, MINOR, TINY].join('.')
|
12
|
+
end
|
12
13
|
end
|
13
14
|
end
|
14
15
|
|
15
16
|
module Fedora; end
|
16
17
|
|
17
|
-
require 'fedora/repository'
|
18
|
+
require 'fedora/repository'
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module MediaShelfClassLevelInheritableAttributes
|
2
|
+
def self.included(base)
|
3
|
+
base.extend(MSClassMethods)
|
4
|
+
end
|
5
|
+
module MSClassMethods
|
6
|
+
def ms_inheritable_attributes(*args)
|
7
|
+
@ms_inheritable_attributes ||=[:ms_inheritable_attributes]
|
8
|
+
@ms_inheritable_attributes+=args
|
9
|
+
args.each do |arg|
|
10
|
+
class_eval %(
|
11
|
+
class <<self;attr_accessor :#{arg} end
|
12
|
+
)
|
13
|
+
end
|
14
|
+
@ms_inheritable_attributes
|
15
|
+
end
|
16
|
+
def inherited(subclass)
|
17
|
+
@ms_inheritable_attributes.each do |attrib|
|
18
|
+
instance_var = "@#{attrib}"
|
19
|
+
subclass.instance_variable_set(instance_var, instance_variable_get(instance_var))
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
<?xml version="1.0" ?>
|
2
|
+
<!--
|
3
|
+
Licensed to the Apache Software Foundation (ASF) under one or more
|
4
|
+
contributor license agreements. See the NOTICE file distributed with
|
5
|
+
this work for additional information regarding copyright ownership.
|
6
|
+
The ASF licenses this file to You under the Apache License, Version 2.0
|
7
|
+
(the "License"); you may not use this file except in compliance with
|
8
|
+
the License. You may obtain a copy of the License at
|
9
|
+
|
10
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
|
12
|
+
Unless required by applicable law or agreed to in writing, software
|
13
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
See the License for the specific language governing permissions and
|
16
|
+
limitations under the License.
|
17
|
+
-->
|
18
|
+
|
19
|
+
<!-- This is the Solr schema file. This file should be named "schema.xml" and
|
20
|
+
should be in the conf directory under the solr home
|
21
|
+
(i.e. ./solr/conf/schema.xml by default)
|
22
|
+
or located where the classloader for the Solr webapp can find it.
|
23
|
+
|
24
|
+
For more information, on how to customize this file, please see
|
25
|
+
http://wiki.apache.org/solr/SchemaXml
|
26
|
+
-->
|
27
|
+
|
28
|
+
<schema name="active_fedora" version="0.1">
|
29
|
+
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
30
|
+
Applications should change this to reflect the nature of the search collection.
|
31
|
+
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
32
|
+
not normally be changed by applications.
|
33
|
+
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
34
|
+
1.1: multiValued attribute introduced, false by default -->
|
35
|
+
|
36
|
+
<types>
|
37
|
+
<!-- field type definitions. The "name" attribute is
|
38
|
+
just a label to be used by field definitions. The "class"
|
39
|
+
attribute and any other attributes determine the real
|
40
|
+
behavior of the fieldtype.
|
41
|
+
Class names starting with "solr" refer to java classes in the
|
42
|
+
org.apache.solr.analysis package.
|
43
|
+
-->
|
44
|
+
|
45
|
+
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
46
|
+
- StrField and TextField support an optional compressThreshold which
|
47
|
+
limits compression (if enabled in the derived fields) to values which
|
48
|
+
exceed a certain size (in characters).
|
49
|
+
-->
|
50
|
+
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
51
|
+
|
52
|
+
<!-- boolean type: "true" or "false" -->
|
53
|
+
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
54
|
+
|
55
|
+
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
56
|
+
currently supported on types that are sorted internally as strings.
|
57
|
+
- If sortMissingLast="true", then a sort on this field will cause documents
|
58
|
+
without the field to come after documents with the field,
|
59
|
+
regardless of the requested sort order (asc or desc).
|
60
|
+
- If sortMissingFirst="true", then a sort on this field will cause documents
|
61
|
+
without the field to come before documents with the field,
|
62
|
+
regardless of the requested sort order.
|
63
|
+
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
64
|
+
then default lucene sorting will be used which places docs without the
|
65
|
+
field first in an ascending sort and last in a descending sort.
|
66
|
+
-->
|
67
|
+
|
68
|
+
|
69
|
+
<!-- numeric field types that store and index the text
|
70
|
+
value verbatim (and hence don't support range queries, since the
|
71
|
+
lexicographic ordering isn't equal to the numeric ordering) -->
|
72
|
+
<fieldtype name="integer" class="solr.IntField" omitNorms="true"/>
|
73
|
+
<fieldtype name="long" class="solr.LongField" omitNorms="true"/>
|
74
|
+
<fieldtype name="float" class="solr.FloatField" omitNorms="true"/>
|
75
|
+
<fieldtype name="double" class="solr.DoubleField" omitNorms="true"/>
|
76
|
+
|
77
|
+
|
78
|
+
<!-- Numeric field types that manipulate the value into
|
79
|
+
a string value that isn't human-readable in its internal form,
|
80
|
+
but with a lexicographic ordering the same as the numeric ordering,
|
81
|
+
so that range queries work correctly. -->
|
82
|
+
<fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
|
83
|
+
<fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
|
84
|
+
<fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
|
85
|
+
<fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
|
86
|
+
|
87
|
+
|
88
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
89
|
+
is a more restricted form of the canonical representation of dateTime
|
90
|
+
http://www.w3.org/TR/xmlschema-2/#dateTime
|
91
|
+
The trailing "Z" designates UTC time and is mandatory.
|
92
|
+
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
93
|
+
All other components are mandatory.
|
94
|
+
|
95
|
+
Expressions can also be used to denote calculations that should be
|
96
|
+
performed relative to "NOW" to determine the value, ie...
|
97
|
+
|
98
|
+
NOW/HOUR
|
99
|
+
... Round to the start of the current hour
|
100
|
+
NOW-1DAY
|
101
|
+
... Exactly 1 day prior to now
|
102
|
+
NOW/DAY+6MONTHS+3DAYS
|
103
|
+
... 6 months and 3 days in the future from the start of
|
104
|
+
the current day
|
105
|
+
|
106
|
+
Consult the DateField javadocs for more information.
|
107
|
+
-->
|
108
|
+
<fieldtype name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
|
109
|
+
|
110
|
+
<!-- solr.TextField allows the specification of custom text analyzers
|
111
|
+
specified as a tokenizer and a list of token filters. Different
|
112
|
+
analyzers may be specified for indexing and querying.
|
113
|
+
|
114
|
+
The optional positionIncrementGap puts space between multiple fields of
|
115
|
+
this type on the same document, with the purpose of preventing false phrase
|
116
|
+
matching across fields.
|
117
|
+
|
118
|
+
For more info on customizing your analyzer chain, please see
|
119
|
+
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
120
|
+
-->
|
121
|
+
|
122
|
+
<!-- One can also specify an existing Analyzer class that has a
|
123
|
+
default constructor via the class attribute on the analyzer element
|
124
|
+
<fieldtype name="text_greek" class="solr.TextField">
|
125
|
+
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
126
|
+
</fieldType>
|
127
|
+
-->
|
128
|
+
|
129
|
+
<!-- A text field that only splits on whitespace for exact matching of words -->
|
130
|
+
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
131
|
+
<analyzer>
|
132
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
133
|
+
</analyzer>
|
134
|
+
</fieldtype>
|
135
|
+
|
136
|
+
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
|
137
|
+
<analyzer>
|
138
|
+
<tokenizer class="solr.StandardTokenizerFactory"/>
|
139
|
+
<filter class="solr.StandardFilterFactory"/>
|
140
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
141
|
+
</analyzer>
|
142
|
+
</fieldtype>
|
143
|
+
|
144
|
+
<fieldtype name="text_zh" class="solr.TextField">
|
145
|
+
<analyzer class="org.apache.lucene.analysis.cn.ChineseAnalyzer"/>
|
146
|
+
</fieldtype>
|
147
|
+
|
148
|
+
|
149
|
+
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
150
|
+
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
151
|
+
<fieldtype name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
152
|
+
<analyzer>
|
153
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
154
|
+
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
155
|
+
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
156
|
+
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
157
|
+
<filter class="solr.LowerCaseFilterFactory"/>
|
158
|
+
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
159
|
+
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
160
|
+
</analyzer>
|
161
|
+
</fieldtype>
|
162
|
+
|
163
|
+
</types>
|
164
|
+
|
165
|
+
|
166
|
+
<fields>
|
167
|
+
<!-- Valid attributes for fields:
|
168
|
+
name: mandatory - the name for the field
|
169
|
+
type: mandatory - the name of a previously defined type from the <types> section
|
170
|
+
indexed: true if this field should be indexed (searchable or sortable)
|
171
|
+
stored: true if this field should be retrievable
|
172
|
+
compressed: [false] if this field should be stored using gzip compression
|
173
|
+
(this will only apply if the field type is compressable; among
|
174
|
+
the standard field types, only TextField and StrField are)
|
175
|
+
multiValued: true if this field may contain multiple values per document
|
176
|
+
omitNorms: (expert) set to true to omit the norms associated with
|
177
|
+
this field (this disables length normalization and index-time
|
178
|
+
boosting for the field, and saves some memory). Only full-text
|
179
|
+
fields or fields that need an index-time boost need norms.
|
180
|
+
-->
|
181
|
+
|
182
|
+
<field name="id" type="string" indexed="true" stored="true"/>
|
183
|
+
|
184
|
+
<!-- catchall field, containing all other searchable text fields (implemented
|
185
|
+
via copyField further on in this schema -->
|
186
|
+
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
187
|
+
<!-- catchall field, containing all other searchable text fields (implemented
|
188
|
+
via copyField further on in this schema -->
|
189
|
+
<field name="date" type="date" indexed="true" stored="false" multiValued="true"/>
|
190
|
+
|
191
|
+
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
192
|
+
will be used if the name matches any of the patterns.
|
193
|
+
RESTRICTION: the glob-like pattern in the name attribute must have
|
194
|
+
a "*" only at the start or the end.
|
195
|
+
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
196
|
+
Longer patterns will be matched first. if equal size patterns
|
197
|
+
both match, the first appearing in the schema will be used. -->
|
198
|
+
<dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="true"/>
|
199
|
+
<dynamicField name="*_name" type="date" indexed="true" stored="true" multiValued="true"/>
|
200
|
+
<dynamicField name="*_field" type="text" indexed="true" stored="true" multiValued="true"/>
|
201
|
+
|
202
|
+
<dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true"/>
|
203
|
+
<dynamicField name="*_zh_text" type="text_zh" indexed="true" stored="true" multiValued="true"/>
|
204
|
+
<dynamicField name="*_text" type="text" indexed="true" stored="true" multiValued="true"/>
|
205
|
+
<dynamicField name="*_display" type="text" indexed="false" stored="true" multiValued="true"/>
|
206
|
+
</fields>
|
207
|
+
|
208
|
+
<!-- field to use to determine and enforce document uniqueness. -->
|
209
|
+
<uniqueKey>id</uniqueKey>
|
210
|
+
|
211
|
+
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
212
|
+
<defaultSearchField>text</defaultSearchField>
|
213
|
+
|
214
|
+
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
215
|
+
<solrQueryParser defaultOperator="AND"/>
|
216
|
+
|
217
|
+
<!-- copyField commands copy one field to another at the time a document
|
218
|
+
is added to the index. It's used either to index the same field differently,
|
219
|
+
or to add multiple fields to the same field for easier/faster searching. -->
|
220
|
+
<copyField source="*_date" dest="date"/>
|
221
|
+
<copyField source="*_text" dest="text"/>
|
222
|
+
<copyField source="*_facet" dest="text"/>
|
223
|
+
|
224
|
+
<!-- Similarity is the scoring routine for each document vs. a query.
|
225
|
+
A custom similarity may be specified here, but the default is fine
|
226
|
+
for most applications. -->
|
227
|
+
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
|
228
|
+
|
229
|
+
</schema>
|