ruby-fedora 0.1.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/COPYING.LESSER.txt +165 -0
  2. data/COPYING.txt +674 -0
  3. data/Manifest.txt +19 -20
  4. data/README.txt +6 -1
  5. data/Rakefile +4 -0
  6. data/config/hoe.rb +2 -2
  7. data/config/requirements.rb +9 -2
  8. data/lib/active_fedora.rb +41 -0
  9. data/lib/active_fedora/base.rb +278 -8
  10. data/lib/active_fedora/content_model.rb +22 -0
  11. data/lib/active_fedora/datastream.rb +95 -0
  12. data/lib/active_fedora/fedora_object.rb +84 -0
  13. data/lib/active_fedora/metadata_datastream.rb +97 -0
  14. data/lib/active_fedora/model.rb +94 -0
  15. data/lib/active_fedora/property.rb +15 -0
  16. data/lib/active_fedora/qualified_dublin_core_datastream.rb +72 -0
  17. data/lib/active_fedora/relationship.rb +43 -0
  18. data/lib/active_fedora/rels_ext_datastream.rb +43 -0
  19. data/lib/active_fedora/semantic_node.rb +221 -0
  20. data/lib/active_fedora/solr_service.rb +20 -0
  21. data/lib/fedora/base.rb +2 -1
  22. data/lib/fedora/connection.rb +104 -134
  23. data/lib/fedora/datastream.rb +10 -1
  24. data/lib/fedora/fedora_object.rb +28 -24
  25. data/lib/fedora/generic_search.rb +71 -0
  26. data/lib/fedora/repository.rb +47 -3
  27. data/lib/ruby-fedora.rb +9 -8
  28. data/lib/util/class_level_inheritable_attributes.rb +23 -0
  29. data/solr/config/schema.xml +229 -0
  30. metadata +37 -24
  31. data/lib/active-fedora.rb +0 -1
  32. data/lib/ambition/adapters/active_fedora.rb +0 -10
  33. data/lib/ambition/adapters/active_fedora/base.rb +0 -14
  34. data/lib/ambition/adapters/active_fedora/query.rb +0 -48
  35. data/lib/ambition/adapters/active_fedora/select.rb +0 -104
  36. data/lib/ambition/adapters/active_fedora/slice.rb +0 -19
  37. data/lib/ambition/adapters/active_fedora/sort.rb +0 -43
  38. data/script/destroy +0 -14
  39. data/script/generate +0 -14
  40. data/script/txt2html +0 -74
  41. data/tasks/deployment.rake +0 -34
  42. data/tasks/environment.rake +0 -7
  43. data/tasks/website.rake +0 -17
  44. data/website/index.html +0 -93
  45. data/website/index.txt +0 -39
  46. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  47. data/website/stylesheets/screen.css +0 -138
  48. data/website/template.rhtml +0 -48
@@ -12,11 +12,20 @@ class Fedora::Datastream < Fedora::BaseObject
12
12
  end
13
13
 
14
14
  def dsid
15
- attributes[:dsID]
15
+ if attributes.has_key?(:dsid)
16
+ attributes[:dsid]
17
+ else
18
+ attributes[:dsID]
19
+ end
16
20
  end
17
21
 
18
22
  # See http://www.fedora.info/definitions/identifiers/
19
23
  def uri
20
24
  "fedora:info/#{pid}/datastreams/#{dsid}"
21
25
  end
26
+
27
+ # @returns the url of the datastream in Fedora, without the repository userinfo
28
+ def url
29
+ return "#{Fedora::Repository.instance.base_url}/objects/#{pid}/datastreams/#{dsid}"
30
+ end
22
31
  end
@@ -33,19 +33,25 @@ class Fedora::FedoraObject < Fedora::BaseObject
33
33
  # TODO: Mix In DC attribute finders/accessors
34
34
  # TODO: Make sure that fedora_object_type and contentModel are juggled properly.
35
35
  def retrieve_attr_from_fedora
36
- self.attributes.merge!(objectProfile)
36
+ self.attributes.merge!(profile)
37
+ object_rexml = REXML::Document.new(object_xml)
37
38
  self.attributes.merge!({
38
- :state => objectXML.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#state']"].attributes["value"]
39
+ :state => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#state']"].attributes["value"],
40
+ :create_date => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#createdDate']"].attributes["value"],
41
+ :modified_date => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#lastModifiedDate']"].attributes["value"],
42
+ :label => object_rexml.root.elements["objectProperties/property[@NAME='info:fedora/fedora-system:def/model#label']"].attributes["value"]
39
43
  })
40
44
 
41
45
  end
42
46
 
43
47
  def create_date
44
- objectProfile[:create_date]
48
+ profile[:create_date]
49
+ #self.attributes[:cDate]
45
50
  end
46
51
 
47
52
  def modified_date
48
- objectProfile[:modified_date]
53
+ profile[:create_date]
54
+ #self.attributes[:mDate]
49
55
  end
50
56
 
51
57
 
@@ -77,14 +83,6 @@ class Fedora::FedoraObject < Fedora::BaseObject
77
83
  self.attributes[:label] = new_label
78
84
  end
79
85
 
80
- def content_model
81
- self.attributes[:contentModel]
82
- end
83
-
84
- def content_model=(new_content_model)
85
- self.attributes[:contentModel] = new_content_model
86
- end
87
-
88
86
  # Get the object and read its @ownerId from the profile
89
87
  def owner_id
90
88
  self.attributes[:ownerID]
@@ -96,24 +94,30 @@ class Fedora::FedoraObject < Fedora::BaseObject
96
94
 
97
95
  def profile
98
96
  # Use xmlsimple to slurp the attributes
99
- objectProfile = XmlSimple.xml_in(@fedora.call_resource(:retrieve, :objects_profile, {:pid => @pid}))
100
- # TODO: Find out if xmlsimple automatically expands camelCased element names...
101
- profile_as_array = {
102
- :owner_id => objectProfile[objOwnerId],
103
- :content_model => objectProfile[objContentModel],
104
- :label => objectProfile[objLabel],
105
- :date_created => objectProfile[objCreateDate],
106
- :date_modified => objectProfile[objLastModDate]
107
- }
97
+ retrieved_profile = XmlSimple.xml_in(Fedora::Repository.instance.fetch_custom(self.pid, :profile))
98
+ profile_hash = Hash[:owner_id => retrieved_profile["objOwnerId"],
99
+ :label => retrieved_profile["objLabel"],
100
+ :create_date => retrieved_profile["objCreateDate"],
101
+ :modified_date => retrieved_profile["objLastModDate"]
102
+ ]
108
103
  end
109
104
 
110
- def objectXML
111
- # Use REXML to slurp the attributes (can't use xmlsimple because the XML is too complex. Need XPath-like queries.
112
- @objectXML ||= REXML::Document.new(@fedora.call_resource(:retrieve, :objects_objectXml, {:pid => @pid}))
105
+ def object_xml
106
+ Fedora::Repository.instance.fetch_custom(pid, :objectXML)
107
+ end
108
+
109
+ def self.object_xml(pid=pid)
110
+ Fedora::Repository.instance.fetch_custom(pid, :objectXML)
113
111
  end
114
112
 
115
113
  # See http://www.fedora.info/definitions/identifiers
116
114
  def uri
117
115
  "fedora:info/#{pid}"
118
116
  end
117
+
118
+ # @returns the url of the object in Fedora, without the repository userinfo
119
+ def url
120
+ repo_url = Fedora::Repository.instance.fedora_url
121
+ return "#{repo_url.scheme}://#{repo_url.host}:#{repo_url.port}#{repo_url.path}/objects/#{pid}"
122
+ end
119
123
  end
@@ -0,0 +1,71 @@
1
+ #
2
+ # @Creator Matt Zumwalt, MediaShelf LLC
3
+ # @Copyright Matt Zumwalt, 2007. All Rights Reserved.
4
+ #
5
+
6
+ module Fedora
7
+ class GenericSearch
8
+
9
+ def initialize(uri, service_name)
10
+ @uri = "#{uri}/#{service_name}"
11
+ @client = HTTPClient.new(@uri)
12
+ @extheader = {'User-Agent'=>"RubyFedora"}
13
+ end
14
+
15
+ def call_resource
16
+
17
+ end
18
+
19
+ # TODO: Handle ruby-tyle params, camel-casing them before passing to call_resource...
20
+ def update_index(params)
21
+ query = {:action=>action,:value=>value,:repositoryName=>repository_name,:indexName=>indexname,:restXslt=>"copyXML"}.merge(params)
22
+ query.merge {:operation> "updateIndex"}
23
+
24
+ @client.get(@uri, query, @extheader)
25
+ return "update_index Not Implemented."
26
+ end
27
+
28
+
29
+ def browse_index(params)
30
+ # Sample: /fedoragsearch/rest?operation=browseIndex&startTerm=&fieldName=PID&termPageSize=20&indexName=FedoraIndex&restXslt=copyXml&resultPageXslt=browseIndexToResultPage
31
+ query = {:startTerm=>URLEncoder.encode(start_term, "UTF-8"),:fieldName=>"",:indexName=>"",:termPageSize=>"", :restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
32
+ query.merge {:operation> "browseIndex"}
33
+ @client.get(@uri, query, @extheader)
34
+ return "browse_index Not Implemented."
35
+ end
36
+
37
+ def gfind_objects(params)
38
+ # Sample: /fedoragsearch/rest?operation=gfindObjects&query=test&hitPageSize=10&restXslt=copyXml
39
+ # fieldMaxLength limits the number of characters returned from the value of each object field.
40
+ # Snippets will highlight matched words within the search results. To keep the xml as simple as possible, set snippetsMax to 0.
41
+ query = {:query=>URLEncoder.encode(query, "UTF-8"),:value=>value,:indexName=>indexname,:hitPageStart=>"",:hitPageSize=>"",:snippetsMax=>"0",:fieldMaxLength=>"",:restXslt=>"copyXML",:resultPageXslt=>""}.merge(params)
42
+ query.merge {:operation> "gfindObjects"}
43
+ @client.get(@uri, query, @extheader)
44
+ return "gfind_objects Not Implemented."
45
+ end
46
+
47
+ def get_index_info(params)
48
+ # Sample: /fedoragsearch/rest?operation=getIndexInfo&restXslt=copyXml
49
+ query = {:indexName=>"",:restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
50
+ query.merge {:operation> "getIndexInfo"}
51
+ @client.get(@uri, query, @extheader)
52
+ return "get_index_info Not Implemented."
53
+ end
54
+
55
+ def get_repository_info(params)
56
+ # Sample: /fedoragsearch/rest?operation=getRepositoryInfo&restXslt=copyXml
57
+ query = {:repositoryName=>"",:restXslt=>"copyXml", :resultPageXslt=>""}.merge(params)
58
+ query.merge {:operation> "getRepositoryInfo"}
59
+ @client.get(@uri, query, @extheader)
60
+ return "get_repository_info Not Implemented."
61
+ end
62
+
63
+ def configure(params)
64
+ query = {:configName=>""}.merge(params)
65
+ query.merge {:operation> "configure"}
66
+ @client.get(@uri, query, @extheader)
67
+ return "configure Not Implemented."
68
+ end
69
+
70
+ end
71
+ end
@@ -15,6 +15,30 @@ module Fedora
15
15
  ]
16
16
 
17
17
  class Repository
18
+
19
+ attr_accessor :repository_name, :base_url, :fedora_version, :pid_namespace, :pid_delimiter
20
+
21
+ def self.flush
22
+ Thread.current[:repo]=nil
23
+ end
24
+ def self.register(url, surrogate=nil)
25
+ Thread.current[:repo]= Fedora::Repository.new(url, surrogate)
26
+ begin
27
+ repo = Thread.current[:repo]
28
+ attributes = repo.describe_repository
29
+ repo.repository_name = attributes["repositoryName"].first
30
+ repo.base_url = attributes["repositoryBaseURL"].first
31
+ repo.fedora_version = attributes["repositoryVersion"].first
32
+ repo.pid_namespace = attributes["repositoryPID"].first["PID-namespaceIdentifier"].first
33
+ repo.pid_delimiter = attributes["repositoryPID"].first["PID-delimiter"].first
34
+ rescue
35
+ end
36
+ Thread.current[:repo]
37
+ end
38
+ def self.instance
39
+ raise "did you register a repo?" unless Thread.current[:repo]
40
+ Thread.current[:repo]
41
+ end
18
42
  class StringResponse < String
19
43
  attr_reader :content_type
20
44
 
@@ -26,8 +50,9 @@ module Fedora
26
50
 
27
51
  attr_accessor :fedora_url
28
52
 
29
- def initialize(fedora_url = "http://localhost:8080/fedora")
53
+ def initialize(fedora_url, surrogate=nil)
30
54
  @fedora_url = fedora_url.is_a?(URI) ? fedora_url : URI.parse(fedora_url)
55
+ @surrogate = surrogate
31
56
  @connection = nil
32
57
  end
33
58
 
@@ -37,6 +62,8 @@ module Fedora
37
62
  StringResponse.new(response.body, response.content_type)
38
63
  end
39
64
 
65
+
66
+
40
67
  # Find fedora objects with http://www.fedora.info/wiki/index.php/API-A-Lite_findObjects
41
68
  #
42
69
  # == Parameters
@@ -69,6 +96,11 @@ module Fedora
69
96
 
70
97
  convert_xml(connection.get("#{fedora_url.path}/objects?#{params.to_query}#{includes}"))
71
98
  end
99
+ def find_model(pid, klazz)
100
+ obj = self.find_objects("pid=#{pid}").first
101
+ doc = REXML::Document.new(obj.object_xml, :ignore_whitespace_nodes=>:all)
102
+ klazz.deserialize(doc)
103
+ end
72
104
 
73
105
  # Create the given object if it's new (not obtained from a find method). Otherwise update the object.
74
106
  #
@@ -78,6 +110,12 @@ module Fedora
78
110
  def save(object)
79
111
  object.new_object? ? create(object) : update(object)
80
112
  end
113
+
114
+ def nextid
115
+ d = REXML::Document.new(connection.post(fedora_url.path+"/management/getNextPID?xml=true").body)
116
+ d.elements['//pid'].text
117
+ end
118
+
81
119
 
82
120
  def create(object)
83
121
  case object
@@ -139,7 +177,7 @@ module Fedora
139
177
  def delete(object)
140
178
  raise ArgumentError, "Object must not be nil" if object.nil?
141
179
  response = connection.delete("#{url_for(object)}")
142
- response.code == '200'
180
+ response.code == '200' or response.code == '204' # Temporary hack around error in Fedora 3.0 Final's REST API
143
181
  end
144
182
 
145
183
  # Fetch the given object using custom method. This is used to fetch other aspects of a fedora object,
@@ -162,6 +200,11 @@ module Fedora
162
200
  connection.raw_get("#{url_for(object)}#{path}?#{extra_params.to_query}").body
163
201
  end
164
202
 
203
+ def describe_repository
204
+ result_body = connection.raw_get("#{fedora_url.path}/describe?xml=true").body
205
+ XmlSimple.xml_in(result_body)
206
+ end
207
+
165
208
  private
166
209
  def convert_xml(response)
167
210
  results = FedoraObjects.new
@@ -175,6 +218,7 @@ module Fedora
175
218
  when Hash
176
219
  results << FedoraObject.new(objectFields.symbolize_keys!)
177
220
  end
221
+ results.each {|result| result.new_object = false}
178
222
  results
179
223
  end
180
224
 
@@ -189,7 +233,7 @@ module Fedora
189
233
  # or not (defaults to +false+).
190
234
  def connection(refresh = false)
191
235
  if refresh || @connection.nil?
192
- @connection = RubyFedora::Connection.new(@fedora_url, Fedora::XmlFormat)
236
+ @connection = Fedora::Connection.new(@fedora_url, Fedora::XmlFormat, @surrogate)
193
237
  end
194
238
  @connection
195
239
  end
data/lib/ruby-fedora.rb CHANGED
@@ -1,17 +1,18 @@
1
1
  $LOAD_PATH.unshift File.dirname(__FILE__) unless
2
- $LOAD_PATH.include?(File.dirname(__FILE__)) ||
3
- $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+ $LOAD_PATH.include?(File.dirname(__FILE__)) || $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
4
3
 
5
- module RubyFedora #:nodoc:
4
+ module Fedora #:nodoc:
6
5
  module VERSION #:nodoc:
7
- MAJOR = 0
8
- MINOR = 1
9
- TINY = 2
6
+ unless defined? MAJOR
7
+ MAJOR = 0
8
+ MINOR = 9
9
+ TINY = 0
10
10
 
11
- STRING = [MAJOR, MINOR, TINY].join('.')
11
+ STRING = [MAJOR, MINOR, TINY].join('.')
12
+ end
12
13
  end
13
14
  end
14
15
 
15
16
  module Fedora; end
16
17
 
17
- require 'fedora/repository'
18
+ require 'fedora/repository'
@@ -0,0 +1,23 @@
1
+ module MediaShelfClassLevelInheritableAttributes
2
+ def self.included(base)
3
+ base.extend(MSClassMethods)
4
+ end
5
+ module MSClassMethods
6
+ def ms_inheritable_attributes(*args)
7
+ @ms_inheritable_attributes ||=[:ms_inheritable_attributes]
8
+ @ms_inheritable_attributes+=args
9
+ args.each do |arg|
10
+ class_eval %(
11
+ class <<self;attr_accessor :#{arg} end
12
+ )
13
+ end
14
+ @ms_inheritable_attributes
15
+ end
16
+ def inherited(subclass)
17
+ @ms_inheritable_attributes.each do |attrib|
18
+ instance_var = "@#{attrib}"
19
+ subclass.instance_variable_set(instance_var, instance_variable_get(instance_var))
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,229 @@
1
+ <?xml version="1.0" ?>
2
+ <!--
3
+ Licensed to the Apache Software Foundation (ASF) under one or more
4
+ contributor license agreements. See the NOTICE file distributed with
5
+ this work for additional information regarding copyright ownership.
6
+ The ASF licenses this file to You under the Apache License, Version 2.0
7
+ (the "License"); you may not use this file except in compliance with
8
+ the License. You may obtain a copy of the License at
9
+
10
+ http://www.apache.org/licenses/LICENSE-2.0
11
+
12
+ Unless required by applicable law or agreed to in writing, software
13
+ distributed under the License is distributed on an "AS IS" BASIS,
14
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ See the License for the specific language governing permissions and
16
+ limitations under the License.
17
+ -->
18
+
19
+ <!-- This is the Solr schema file. This file should be named "schema.xml" and
20
+ should be in the conf directory under the solr home
21
+ (i.e. ./solr/conf/schema.xml by default)
22
+ or located where the classloader for the Solr webapp can find it.
23
+
24
+ For more information, on how to customize this file, please see
25
+ http://wiki.apache.org/solr/SchemaXml
26
+ -->
27
+
28
+ <schema name="active_fedora" version="0.1">
29
+ <!-- attribute "name" is the name of this schema and is only used for display purposes.
30
+ Applications should change this to reflect the nature of the search collection.
31
+ version="1.1" is Solr's version number for the schema syntax and semantics. It should
32
+ not normally be changed by applications.
33
+ 1.0: multiValued attribute did not exist, all fields are multiValued by nature
34
+ 1.1: multiValued attribute introduced, false by default -->
35
+
36
+ <types>
37
+ <!-- field type definitions. The "name" attribute is
38
+ just a label to be used by field definitions. The "class"
39
+ attribute and any other attributes determine the real
40
+ behavior of the fieldtype.
41
+ Class names starting with "solr" refer to java classes in the
42
+ org.apache.solr.analysis package.
43
+ -->
44
+
45
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
46
+ - StrField and TextField support an optional compressThreshold which
47
+ limits compression (if enabled in the derived fields) to values which
48
+ exceed a certain size (in characters).
49
+ -->
50
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
51
+
52
+ <!-- boolean type: "true" or "false" -->
53
+ <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
54
+
55
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
56
+ currently supported on types that are sorted internally as strings.
57
+ - If sortMissingLast="true", then a sort on this field will cause documents
58
+ without the field to come after documents with the field,
59
+ regardless of the requested sort order (asc or desc).
60
+ - If sortMissingFirst="true", then a sort on this field will cause documents
61
+ without the field to come before documents with the field,
62
+ regardless of the requested sort order.
63
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
64
+ then default lucene sorting will be used which places docs without the
65
+ field first in an ascending sort and last in a descending sort.
66
+ -->
67
+
68
+
69
+ <!-- numeric field types that store and index the text
70
+ value verbatim (and hence don't support range queries, since the
71
+ lexicographic ordering isn't equal to the numeric ordering) -->
72
+ <fieldtype name="integer" class="solr.IntField" omitNorms="true"/>
73
+ <fieldtype name="long" class="solr.LongField" omitNorms="true"/>
74
+ <fieldtype name="float" class="solr.FloatField" omitNorms="true"/>
75
+ <fieldtype name="double" class="solr.DoubleField" omitNorms="true"/>
76
+
77
+
78
+ <!-- Numeric field types that manipulate the value into
79
+ a string value that isn't human-readable in its internal form,
80
+ but with a lexicographic ordering the same as the numeric ordering,
81
+ so that range queries work correctly. -->
82
+ <fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
83
+ <fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
84
+ <fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
85
+ <fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
86
+
87
+
88
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
89
+ is a more restricted form of the canonical representation of dateTime
90
+ http://www.w3.org/TR/xmlschema-2/#dateTime
91
+ The trailing "Z" designates UTC time and is mandatory.
92
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
93
+ All other components are mandatory.
94
+
95
+ Expressions can also be used to denote calculations that should be
96
+ performed relative to "NOW" to determine the value, ie...
97
+
98
+ NOW/HOUR
99
+ ... Round to the start of the current hour
100
+ NOW-1DAY
101
+ ... Exactly 1 day prior to now
102
+ NOW/DAY+6MONTHS+3DAYS
103
+ ... 6 months and 3 days in the future from the start of
104
+ the current day
105
+
106
+ Consult the DateField javadocs for more information.
107
+ -->
108
+ <fieldtype name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
109
+
110
+ <!-- solr.TextField allows the specification of custom text analyzers
111
+ specified as a tokenizer and a list of token filters. Different
112
+ analyzers may be specified for indexing and querying.
113
+
114
+ The optional positionIncrementGap puts space between multiple fields of
115
+ this type on the same document, with the purpose of preventing false phrase
116
+ matching across fields.
117
+
118
+ For more info on customizing your analyzer chain, please see
119
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
120
+ -->
121
+
122
+ <!-- One can also specify an existing Analyzer class that has a
123
+ default constructor via the class attribute on the analyzer element
124
+ <fieldtype name="text_greek" class="solr.TextField">
125
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
126
+ </fieldType>
127
+ -->
128
+
129
+ <!-- A text field that only splits on whitespace for exact matching of words -->
130
+ <fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100">
131
+ <analyzer>
132
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
133
+ </analyzer>
134
+ </fieldtype>
135
+
136
+ <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
137
+ <analyzer>
138
+ <tokenizer class="solr.StandardTokenizerFactory"/>
139
+ <filter class="solr.StandardFilterFactory"/>
140
+ <filter class="solr.LowerCaseFilterFactory"/>
141
+ </analyzer>
142
+ </fieldtype>
143
+
144
+ <fieldtype name="text_zh" class="solr.TextField">
145
+ <analyzer class="org.apache.lucene.analysis.cn.ChineseAnalyzer"/>
146
+ </fieldtype>
147
+
148
+
149
+ <!-- Less flexible matching, but less false matches. Probably not ideal for product names,
150
+ but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
151
+ <fieldtype name="textTight" class="solr.TextField" positionIncrementGap="100" >
152
+ <analyzer>
153
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
154
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
155
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
156
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
157
+ <filter class="solr.LowerCaseFilterFactory"/>
158
+ <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
159
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
160
+ </analyzer>
161
+ </fieldtype>
162
+
163
+ </types>
164
+
165
+
166
+ <fields>
167
+ <!-- Valid attributes for fields:
168
+ name: mandatory - the name for the field
169
+ type: mandatory - the name of a previously defined type from the <types> section
170
+ indexed: true if this field should be indexed (searchable or sortable)
171
+ stored: true if this field should be retrievable
172
+ compressed: [false] if this field should be stored using gzip compression
173
+ (this will only apply if the field type is compressable; among
174
+ the standard field types, only TextField and StrField are)
175
+ multiValued: true if this field may contain multiple values per document
176
+ omitNorms: (expert) set to true to omit the norms associated with
177
+ this field (this disables length normalization and index-time
178
+ boosting for the field, and saves some memory). Only full-text
179
+ fields or fields that need an index-time boost need norms.
180
+ -->
181
+
182
+ <field name="id" type="string" indexed="true" stored="true"/>
183
+
184
+ <!-- catchall field, containing all other searchable text fields (implemented
185
+ via copyField further on in this schema -->
186
+ <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
187
+ <!-- catchall field, containing all other searchable text fields (implemented
188
+ via copyField further on in this schema -->
189
+ <field name="date" type="date" indexed="true" stored="false" multiValued="true"/>
190
+
191
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
192
+ will be used if the name matches any of the patterns.
193
+ RESTRICTION: the glob-like pattern in the name attribute must have
194
+ a "*" only at the start or the end.
195
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
196
+ Longer patterns will be matched first. if equal size patterns
197
+ both match, the first appearing in the schema will be used. -->
198
+ <dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="true"/>
199
+ <dynamicField name="*_name" type="date" indexed="true" stored="true" multiValued="true"/>
200
+ <dynamicField name="*_field" type="text" indexed="true" stored="true" multiValued="true"/>
201
+
202
+ <dynamicField name="*_facet" type="string" indexed="true" stored="true" multiValued="true"/>
203
+ <dynamicField name="*_zh_text" type="text_zh" indexed="true" stored="true" multiValued="true"/>
204
+ <dynamicField name="*_text" type="text" indexed="true" stored="true" multiValued="true"/>
205
+ <dynamicField name="*_display" type="text" indexed="false" stored="true" multiValued="true"/>
206
+ </fields>
207
+
208
+ <!-- field to use to determine and enforce document uniqueness. -->
209
+ <uniqueKey>id</uniqueKey>
210
+
211
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
212
+ <defaultSearchField>text</defaultSearchField>
213
+
214
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
215
+ <solrQueryParser defaultOperator="AND"/>
216
+
217
+ <!-- copyField commands copy one field to another at the time a document
218
+ is added to the index. It's used either to index the same field differently,
219
+ or to add multiple fields to the same field for easier/faster searching. -->
220
+ <copyField source="*_date" dest="date"/>
221
+ <copyField source="*_text" dest="text"/>
222
+ <copyField source="*_facet" dest="text"/>
223
+
224
+ <!-- Similarity is the scoring routine for each document vs. a query.
225
+ A custom similarity may be specified here, but the default is fine
226
+ for most applications. -->
227
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
228
+
229
+ </schema>