spotlight-dor-resources 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.hound.yml +2 -0
- data/.rubocop.yml +8 -0
- data/.rubocop_todo.yml +191 -0
- data/.travis.yml +17 -0
- data/Gemfile +32 -0
- data/LICENSE.txt +13 -0
- data/README.md +65 -0
- data/Rakefile +48 -0
- data/app/models/spotlight/resources/dor_resource.rb +17 -0
- data/app/models/spotlight/resources/harvestdor.rb +4 -0
- data/app/models/spotlight/resources/purl.rb +14 -0
- data/app/models/spotlight/resources/searchworks.rb +15 -0
- data/lib/spotlight/dor/indexer.rb +160 -0
- data/lib/spotlight/dor/resources.rb +16 -0
- data/lib/spotlight/dor/resources/engine.rb +13 -0
- data/lib/spotlight/dor/resources/version.rb +7 -0
- data/solr_conf/conf/schema.xml +346 -0
- data/solr_conf/conf/solrconfig.xml +180 -0
- data/spec/integration/gdor_integration_spec.rb +30 -0
- data/spec/integration/indexer_integration_spec.rb +28 -0
- data/spec/models/spotlight/resources/purl_spec.rb +115 -0
- data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
- data/spec/spec_helper.rb +60 -0
- data/spec/test_app_templates/catalog_controller.rb +96 -0
- data/spec/test_app_templates/gdor.yml +9 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
- data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
- data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
- data/spotlight-dor-resources.gemspec +37 -0
- metadata +336 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module Spotlight::Resources
|
2
|
+
class Purl < Spotlight::Resources::DorResource
|
3
|
+
self.weight = -1000
|
4
|
+
|
5
|
+
def self.can_provide? res
|
6
|
+
!!(res.url =~ /^https?:\/\/purl.stanford.edu/)
|
7
|
+
end
|
8
|
+
|
9
|
+
def doc_id
|
10
|
+
url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Spotlight::Resources
|
2
|
+
class Searchworks < Spotlight::Resources::DorResource
|
3
|
+
|
4
|
+
self.weight = -1000
|
5
|
+
|
6
|
+
def self.can_provide? res
|
7
|
+
!!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
|
8
|
+
end
|
9
|
+
|
10
|
+
def doc_id
|
11
|
+
url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# external gems
|
2
|
+
require 'gdor/indexer'
|
3
|
+
require 'solrizer'
|
4
|
+
# Base class to harvest from DOR via harvestdor gem
|
5
|
+
module Spotlight::Dor
|
6
|
+
class Indexer < GDor::Indexer
|
7
|
+
# add contentMetadata fields
|
8
|
+
before_index do |sdb, solr_doc|
|
9
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
|
10
|
+
|
11
|
+
sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
|
12
|
+
file_id = node.attr('id').gsub(".jp2", '')
|
13
|
+
|
14
|
+
if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
|
15
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
|
16
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
|
17
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
|
18
|
+
end
|
19
|
+
|
20
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
|
21
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
|
22
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
|
23
|
+
Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
|
24
|
+
Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# tweak author_sort field from stanford-mods
|
29
|
+
before_index do |_sdb, solr_doc|
|
30
|
+
solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
|
31
|
+
end
|
32
|
+
|
33
|
+
# add fields from raw mods
|
34
|
+
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
35
|
+
before_index :add_box
|
36
|
+
before_index :add_donor_tags
|
37
|
+
before_index :add_genre
|
38
|
+
before_index :add_folder
|
39
|
+
before_index :add_series
|
40
|
+
before_index :mods_cartographics_indexing
|
41
|
+
|
42
|
+
def solr_client
|
43
|
+
@solr_client
|
44
|
+
end
|
45
|
+
|
46
|
+
def solr_document resource
|
47
|
+
doc_hash = super
|
48
|
+
run_hook :before_index, resource, doc_hash
|
49
|
+
doc_hash
|
50
|
+
end
|
51
|
+
|
52
|
+
def resource druid
|
53
|
+
Harvestdor::Indexer::Resource.new harvestdor, druid
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# add the box number to solr_doc as box_ssi field (note: single valued!)
|
59
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
60
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
61
|
+
def add_box(sdb, solr_doc)
|
62
|
+
# see spec for data from actual collections
|
63
|
+
# _location.physicalLocation should find top level and relatedItem
|
64
|
+
box_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
65
|
+
val = node.text
|
66
|
+
# note that this will also find Flatbox or Flat-box
|
67
|
+
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
68
|
+
match_data[1].strip if match_data.present?
|
69
|
+
end
|
70
|
+
solr_doc['box_ssi'] = box_num.first if box_num.present?
|
71
|
+
end
|
72
|
+
|
73
|
+
# This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
|
74
|
+
# it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
|
75
|
+
# Later refactoring could include project specific fields. Peter Mangiafico
|
76
|
+
def add_donor_tags sdb, solr_doc
|
77
|
+
donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
|
78
|
+
insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
|
79
|
+
end
|
80
|
+
|
81
|
+
# add the folder number to solr_doc as folder_ssi field (note: single valued!)
|
82
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
83
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
84
|
+
def add_folder(sdb, solr_doc)
|
85
|
+
# see spec for data from actual collections
|
86
|
+
# _location.physicalLocation should find top level and relatedItem
|
87
|
+
folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
88
|
+
val = node.text
|
89
|
+
# folder may be text with commas
|
90
|
+
match_data = val.match(/Folder:? ?(.+)/i)
|
91
|
+
next if match_data.blank?
|
92
|
+
result = match_data[1].strip
|
93
|
+
# Menuez collection may have folder followed by Sleeve then Frame
|
94
|
+
match2_data = result.match(/(.*),? ?Sleeve/i)
|
95
|
+
if match2_data
|
96
|
+
match2_data[1].strip.sub(/,$/, '')
|
97
|
+
else
|
98
|
+
result
|
99
|
+
end
|
100
|
+
end
|
101
|
+
solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
|
102
|
+
end
|
103
|
+
|
104
|
+
# add plain MODS <genre> element data, not the SearchWorks genre values
|
105
|
+
def add_genre sdb, solr_doc
|
106
|
+
insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
|
107
|
+
end
|
108
|
+
|
109
|
+
# add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
|
110
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
111
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
112
|
+
def add_series(sdb, solr_doc)
|
113
|
+
# see spec for data from actual collections
|
114
|
+
# _location.physicalLocation should find top level and relatedItem
|
115
|
+
series_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
116
|
+
val = node.text
|
117
|
+
# feigenbaum uses 'Accession'
|
118
|
+
match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
|
119
|
+
match_data[1].strip if match_data.present?
|
120
|
+
end
|
121
|
+
solr_doc['series_ssi'] = series_num.first if series_num.present?
|
122
|
+
end
|
123
|
+
|
124
|
+
def mods_cartographics_indexing sdb, solr_doc
|
125
|
+
insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
|
126
|
+
|
127
|
+
Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
|
128
|
+
next unless n.text =~ /^\(/ and n.text =~ /\)$/
|
129
|
+
|
130
|
+
bbox = n.text.gsub(/[\(\)]/, '')
|
131
|
+
|
132
|
+
lng, lat = bbox.split('/')
|
133
|
+
|
134
|
+
minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
|
135
|
+
maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
|
136
|
+
|
137
|
+
solr_doc["point_bbox"] ||= []
|
138
|
+
solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def coord_to_decimal point
|
143
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
144
|
+
match = regex.match(point)
|
145
|
+
dec = 0
|
146
|
+
|
147
|
+
dec += match['deg'].to_i
|
148
|
+
dec += match['sec'].to_f / 60
|
149
|
+
dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
|
150
|
+
|
151
|
+
dec
|
152
|
+
end
|
153
|
+
|
154
|
+
def insert_field solr_doc, field, values, *args
|
155
|
+
Array(values).each do |v|
|
156
|
+
Solrizer.insert_field solr_doc, field, v, *args
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require "harvestdor-indexer"
|
2
|
+
require "spotlight/dor/resources/version"
|
3
|
+
|
4
|
+
module Spotlight
|
5
|
+
module Dor
|
6
|
+
module Resources
|
7
|
+
|
8
|
+
require "spotlight/dor/indexer"
|
9
|
+
require "spotlight/dor/resources/engine"
|
10
|
+
|
11
|
+
def self.indexer
|
12
|
+
@indexer ||= Spotlight::Dor::Indexer.new File.join(Rails.root, "config", "gdor.yml"), solr: Blacklight.solr_config
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spotlight/engine'
|
2
|
+
require 'spotlight/dor/resources'
|
3
|
+
|
4
|
+
module Spotlight::Dor::Resources
|
5
|
+
class Engine < ::Rails::Engine
|
6
|
+
|
7
|
+
initializer "spotlight.dor.initialize" do
|
8
|
+
Spotlight::Engine.config.resource_providers << Spotlight::Resources::Searchworks
|
9
|
+
Spotlight::Engine.config.resource_providers << Spotlight::Resources::Purl
|
10
|
+
Spotlight::Dor::Resources::Engine.config.parallel_options = { in_threads: 1 }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<schema name="Hydra" version="1.5">
|
3
|
+
<!-- NOTE: various comments and unused configuration possibilities have been purged
|
4
|
+
from this file. Please refer to http://wiki.apache.org/solr/SchemaXml,
|
5
|
+
as well as the default schema file included with Solr -->
|
6
|
+
|
7
|
+
<uniqueKey>id</uniqueKey>
|
8
|
+
|
9
|
+
<fields>
|
10
|
+
<field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
|
11
|
+
<field name="_version_" type="long" indexed="true" stored="true"/>
|
12
|
+
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
13
|
+
|
14
|
+
<field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
15
|
+
<field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
16
|
+
<field name="original_pid_tesim" type="pid_text" stored="true" indexed="true" multiValued="true"/>
|
17
|
+
|
18
|
+
<field name="full_title_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
|
19
|
+
<field name="id_ng" type="text_en_ng" stored="false" indexed="true" multiValued="false"/>
|
20
|
+
|
21
|
+
<!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
|
22
|
+
|
23
|
+
<!-- text (_t...) -->
|
24
|
+
<!--
|
25
|
+
<dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
|
26
|
+
<dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
|
27
|
+
-->
|
28
|
+
<dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
|
29
|
+
<dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
|
30
|
+
<dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
|
31
|
+
<dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
|
32
|
+
<!--
|
33
|
+
<dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
34
|
+
<dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
35
|
+
-->
|
36
|
+
<dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
37
|
+
<dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
38
|
+
|
39
|
+
<!-- English text (_te...) -->
|
40
|
+
<!--
|
41
|
+
<dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
|
42
|
+
<dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
|
43
|
+
-->
|
44
|
+
<dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
|
45
|
+
<dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
|
46
|
+
<dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
|
47
|
+
<dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
|
48
|
+
<!--
|
49
|
+
<dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
50
|
+
<dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
51
|
+
-->
|
52
|
+
<dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
53
|
+
<dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
54
|
+
|
55
|
+
<!-- string (_s...) -->
|
56
|
+
<!--
|
57
|
+
<dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
|
58
|
+
<dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
|
59
|
+
-->
|
60
|
+
<dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
|
61
|
+
<dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
|
62
|
+
<dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
|
63
|
+
<dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
|
64
|
+
<!--
|
65
|
+
<dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
|
66
|
+
-->
|
67
|
+
|
68
|
+
<!-- integer (_i...) -->
|
69
|
+
<!--
|
70
|
+
<dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
|
71
|
+
<dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
|
72
|
+
-->
|
73
|
+
<dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
|
74
|
+
<dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
|
75
|
+
<dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
|
76
|
+
<dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
|
77
|
+
|
78
|
+
<!-- trie integer (_it...) (for faster range queries) -->
|
79
|
+
<!--
|
80
|
+
<dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
|
81
|
+
<dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
|
82
|
+
-->
|
83
|
+
<dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
|
84
|
+
<dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
|
85
|
+
<dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
|
86
|
+
<dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
|
87
|
+
|
88
|
+
<!-- date (_dt...) -->
|
89
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z
|
90
|
+
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
|
91
|
+
<!--
|
92
|
+
<dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
|
93
|
+
<dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
|
94
|
+
-->
|
95
|
+
<dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
|
96
|
+
<dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
|
97
|
+
<dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
|
98
|
+
<dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
|
99
|
+
|
100
|
+
<!-- trie date (_dtt...) (for faster range queries) -->
|
101
|
+
<!--
|
102
|
+
<dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
|
103
|
+
<dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
|
104
|
+
-->
|
105
|
+
<dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
|
106
|
+
<dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
|
107
|
+
<dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
|
108
|
+
<dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
|
109
|
+
|
110
|
+
<!-- long (_l...) -->
|
111
|
+
<!--
|
112
|
+
<dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
|
113
|
+
<dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
|
114
|
+
-->
|
115
|
+
<dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
|
116
|
+
<dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
|
117
|
+
<dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
|
118
|
+
<dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
|
119
|
+
|
120
|
+
<!-- trie long (_lt...) (for faster range queries) -->
|
121
|
+
<!--
|
122
|
+
<dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
|
123
|
+
<dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
|
124
|
+
-->
|
125
|
+
<dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
|
126
|
+
<dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
|
127
|
+
<dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
|
128
|
+
<dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
|
129
|
+
|
130
|
+
<!-- double (_db...) -->
|
131
|
+
<!--
|
132
|
+
<dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
|
133
|
+
<dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
|
134
|
+
-->
|
135
|
+
<dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
|
136
|
+
<dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
|
137
|
+
<dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
|
138
|
+
<dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
|
139
|
+
|
140
|
+
<!-- trie double (_dbt...) (for faster range queries) -->
|
141
|
+
<!--
|
142
|
+
<dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
|
143
|
+
<dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
|
144
|
+
-->
|
145
|
+
<dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
|
146
|
+
<dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
|
147
|
+
<dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
148
|
+
<dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
|
149
|
+
|
150
|
+
<!-- float (_f...) -->
|
151
|
+
<!--
|
152
|
+
<dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
|
153
|
+
<dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
|
154
|
+
-->
|
155
|
+
<dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
|
156
|
+
<dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
|
157
|
+
<dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
|
158
|
+
<dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
|
159
|
+
|
160
|
+
<!-- trie float (_ft...) (for faster range queries) -->
|
161
|
+
<!--
|
162
|
+
<dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
|
163
|
+
<dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
|
164
|
+
-->
|
165
|
+
<dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
|
166
|
+
<dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
|
167
|
+
<dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
|
168
|
+
<dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
|
169
|
+
|
170
|
+
<!-- boolean (_b...) -->
|
171
|
+
<!--
|
172
|
+
<dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
|
173
|
+
-->
|
174
|
+
<dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
|
175
|
+
<dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
|
176
|
+
|
177
|
+
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
178
|
+
<!--
|
179
|
+
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
180
|
+
-->
|
181
|
+
|
182
|
+
<!-- location (_ll...) -->
|
183
|
+
<!--
|
184
|
+
<dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
|
185
|
+
<dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
|
186
|
+
-->
|
187
|
+
<dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
|
188
|
+
<dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
|
189
|
+
<dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
|
190
|
+
<dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
|
191
|
+
|
192
|
+
<!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
|
193
|
+
<field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
194
|
+
|
195
|
+
|
196
|
+
</fields>
|
197
|
+
|
198
|
+
|
199
|
+
<!-- Above, multiple source fields are copied to the [text] field.
|
200
|
+
Another way to map multiple source fields to the same
|
201
|
+
destination field is to use the dynamic field syntax.
|
202
|
+
copyField also supports a maxChars to copy setting. -->
|
203
|
+
|
204
|
+
<copyField source="id" dest="id_ng" maxChars="3000"/>
|
205
|
+
<copyField source="full_title_tesim" dest="full_title_ng" maxChars="3000"/>
|
206
|
+
<copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/>
|
207
|
+
|
208
|
+
<types>
|
209
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
210
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
211
|
+
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
|
212
|
+
|
213
|
+
<!-- Default numeric field types. -->
|
214
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
|
215
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
|
216
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
|
217
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
|
218
|
+
|
219
|
+
<!-- trie numeric field types for faster range queries -->
|
220
|
+
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
221
|
+
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
222
|
+
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
223
|
+
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
224
|
+
|
225
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z
|
226
|
+
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
227
|
+
-->
|
228
|
+
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
|
229
|
+
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
230
|
+
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
231
|
+
|
232
|
+
|
233
|
+
<!-- This point type indexes the coordinates as separate fields (subFields)
|
234
|
+
If subFieldType is defined, it references a type, and a dynamic field
|
235
|
+
definition is created matching *___<typename>. Alternately, if
|
236
|
+
subFieldSuffix is defined, that is used to create the subFields.
|
237
|
+
Example: if subFieldType="double", then the coordinates would be
|
238
|
+
indexed in fields myloc_0___double,myloc_1___double.
|
239
|
+
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
240
|
+
in fields myloc_0_d,myloc_1_d
|
241
|
+
The subFields are an implementation detail of the fieldType, and end
|
242
|
+
users normally should not need to know about them.
|
243
|
+
-->
|
244
|
+
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
245
|
+
|
246
|
+
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
247
|
+
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
248
|
+
|
249
|
+
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
|
250
|
+
For more information about this and other Spatial fields new to Solr 4, see:
|
251
|
+
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
252
|
+
-->
|
253
|
+
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
254
|
+
geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
|
255
|
+
|
256
|
+
<fieldType name="text" class="solr.TextField" omitNorms="false">
|
257
|
+
<analyzer>
|
258
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
259
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
260
|
+
<filter class="solr.TrimFilterFactory"/>
|
261
|
+
</analyzer>
|
262
|
+
</fieldType>
|
263
|
+
|
264
|
+
<!-- A text field that only splits on whitespace for exact matching of words -->
|
265
|
+
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
266
|
+
<analyzer>
|
267
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
268
|
+
<filter class="solr.TrimFilterFactory"/>
|
269
|
+
</analyzer>
|
270
|
+
</fieldType>
|
271
|
+
|
272
|
+
<!-- single token analyzed text, for sorting. Punctuation is significant. -->
|
273
|
+
<fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
274
|
+
<analyzer>
|
275
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
276
|
+
<filter class="solr.ICUFoldingFilterFactory"/>
|
277
|
+
<filter class="solr.TrimFilterFactory" />
|
278
|
+
</analyzer>
|
279
|
+
</fieldtype>
|
280
|
+
|
281
|
+
<!-- A text field with defaults appropriate for English -->
|
282
|
+
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
283
|
+
<analyzer>
|
284
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
285
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
286
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
287
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
288
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
289
|
+
<!--
|
290
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
291
|
+
-->
|
292
|
+
<filter class="solr.TrimFilterFactory"/>
|
293
|
+
</analyzer>
|
294
|
+
</fieldType>
|
295
|
+
|
296
|
+
<!-- A text field with defaults appropriate for English an NGrams -->
|
297
|
+
<fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
|
298
|
+
<analyzer type="index">
|
299
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
300
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
301
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
302
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
303
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
304
|
+
<filter class="solr.TrimFilterFactory"/>
|
305
|
+
<filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" side="front"/>:
|
306
|
+
</analyzer>
|
307
|
+
|
308
|
+
<analyzer type="index">
|
309
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
310
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
311
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
312
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
313
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
314
|
+
<filter class="solr.TrimFilterFactory"/>
|
315
|
+
</analyzer>
|
316
|
+
</fieldType>
|
317
|
+
|
318
|
+
<fieldType name="pid_text" class="solr.TextField" positionIncrementGap="100">
|
319
|
+
<analyzer>
|
320
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
321
|
+
</analyzer>
|
322
|
+
</fieldType>
|
323
|
+
|
324
|
+
<!-- queries for paths match documents at that path, or in descendent paths -->
|
325
|
+
<fieldType name="descendent_path" class="solr.TextField">
|
326
|
+
<analyzer type="index">
|
327
|
+
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
328
|
+
</analyzer>
|
329
|
+
<analyzer type="query">
|
330
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
331
|
+
</analyzer>
|
332
|
+
</fieldType>
|
333
|
+
|
334
|
+
<!-- queries for paths match documents at that path, or in ancestor paths -->
|
335
|
+
<fieldType name="ancestor_path" class="solr.TextField">
|
336
|
+
<analyzer type="index">
|
337
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
338
|
+
</analyzer>
|
339
|
+
<analyzer type="query">
|
340
|
+
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
341
|
+
</analyzer>
|
342
|
+
</fieldType>
|
343
|
+
|
344
|
+
</types>
|
345
|
+
|
346
|
+
</schema>
|