spotlight-dor-resources 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.hound.yml +2 -0
- data/.rubocop.yml +8 -0
- data/.rubocop_todo.yml +191 -0
- data/.travis.yml +17 -0
- data/Gemfile +32 -0
- data/LICENSE.txt +13 -0
- data/README.md +65 -0
- data/Rakefile +48 -0
- data/app/models/spotlight/resources/dor_resource.rb +17 -0
- data/app/models/spotlight/resources/harvestdor.rb +4 -0
- data/app/models/spotlight/resources/purl.rb +14 -0
- data/app/models/spotlight/resources/searchworks.rb +15 -0
- data/lib/spotlight/dor/indexer.rb +160 -0
- data/lib/spotlight/dor/resources.rb +16 -0
- data/lib/spotlight/dor/resources/engine.rb +13 -0
- data/lib/spotlight/dor/resources/version.rb +7 -0
- data/solr_conf/conf/schema.xml +346 -0
- data/solr_conf/conf/solrconfig.xml +180 -0
- data/spec/integration/gdor_integration_spec.rb +30 -0
- data/spec/integration/indexer_integration_spec.rb +28 -0
- data/spec/models/spotlight/resources/purl_spec.rb +115 -0
- data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
- data/spec/spec_helper.rb +60 -0
- data/spec/test_app_templates/catalog_controller.rb +96 -0
- data/spec/test_app_templates/gdor.yml +9 -0
- data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
- data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
- data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
- data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
- data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
- data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
- data/spotlight-dor-resources.gemspec +37 -0
- metadata +336 -0
@@ -0,0 +1,14 @@
|
|
1
|
+
module Spotlight::Resources
|
2
|
+
class Purl < Spotlight::Resources::DorResource
|
3
|
+
self.weight = -1000
|
4
|
+
|
5
|
+
def self.can_provide? res
|
6
|
+
!!(res.url =~ /^https?:\/\/purl.stanford.edu/)
|
7
|
+
end
|
8
|
+
|
9
|
+
def doc_id
|
10
|
+
url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Spotlight::Resources
|
2
|
+
class Searchworks < Spotlight::Resources::DorResource
|
3
|
+
|
4
|
+
self.weight = -1000
|
5
|
+
|
6
|
+
def self.can_provide? res
|
7
|
+
!!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
|
8
|
+
end
|
9
|
+
|
10
|
+
def doc_id
|
11
|
+
url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,160 @@
|
|
1
|
+
# external gems
|
2
|
+
require 'gdor/indexer'
|
3
|
+
require 'solrizer'
|
4
|
+
# Base class to harvest from DOR via harvestdor gem
|
5
|
+
module Spotlight::Dor
|
6
|
+
class Indexer < GDor::Indexer
|
7
|
+
# add contentMetadata fields
|
8
|
+
before_index do |sdb, solr_doc|
|
9
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
|
10
|
+
|
11
|
+
sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
|
12
|
+
file_id = node.attr('id').gsub(".jp2", '')
|
13
|
+
|
14
|
+
if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
|
15
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
|
16
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
|
17
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
|
18
|
+
end
|
19
|
+
|
20
|
+
Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
|
21
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
|
22
|
+
Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
|
23
|
+
Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
|
24
|
+
Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# tweak author_sort field from stanford-mods
|
29
|
+
before_index do |_sdb, solr_doc|
|
30
|
+
solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
|
31
|
+
end
|
32
|
+
|
33
|
+
# add fields from raw mods
|
34
|
+
# see comment with add_donor_tags about Feigenbaum specific donor tags data
|
35
|
+
before_index :add_box
|
36
|
+
before_index :add_donor_tags
|
37
|
+
before_index :add_genre
|
38
|
+
before_index :add_folder
|
39
|
+
before_index :add_series
|
40
|
+
before_index :mods_cartographics_indexing
|
41
|
+
|
42
|
+
def solr_client
|
43
|
+
@solr_client
|
44
|
+
end
|
45
|
+
|
46
|
+
def solr_document resource
|
47
|
+
doc_hash = super
|
48
|
+
run_hook :before_index, resource, doc_hash
|
49
|
+
doc_hash
|
50
|
+
end
|
51
|
+
|
52
|
+
def resource druid
|
53
|
+
Harvestdor::Indexer::Resource.new harvestdor, druid
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# add the box number to solr_doc as box_ssi field (note: single valued!)
|
59
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
60
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
61
|
+
def add_box(sdb, solr_doc)
|
62
|
+
# see spec for data from actual collections
|
63
|
+
# _location.physicalLocation should find top level and relatedItem
|
64
|
+
box_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
65
|
+
val = node.text
|
66
|
+
# note that this will also find Flatbox or Flat-box
|
67
|
+
match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
|
68
|
+
match_data[1].strip if match_data.present?
|
69
|
+
end
|
70
|
+
solr_doc['box_ssi'] = box_num.first if box_num.present?
|
71
|
+
end
|
72
|
+
|
73
|
+
# This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
|
74
|
+
# it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
|
75
|
+
# Later refactoring could include project specific fields. Peter Mangiafico
|
76
|
+
def add_donor_tags sdb, solr_doc
|
77
|
+
donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
|
78
|
+
insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
|
79
|
+
end
|
80
|
+
|
81
|
+
# add the folder number to solr_doc as folder_ssi field (note: single valued!)
|
82
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
83
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
84
|
+
def add_folder(sdb, solr_doc)
|
85
|
+
# see spec for data from actual collections
|
86
|
+
# _location.physicalLocation should find top level and relatedItem
|
87
|
+
folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
88
|
+
val = node.text
|
89
|
+
# folder may be text with commas
|
90
|
+
match_data = val.match(/Folder:? ?(.+)/i)
|
91
|
+
next if match_data.blank?
|
92
|
+
result = match_data[1].strip
|
93
|
+
# Menuez collection may have folder followed by Sleeve then Frame
|
94
|
+
match2_data = result.match(/(.*),? ?Sleeve/i)
|
95
|
+
if match2_data
|
96
|
+
match2_data[1].strip.sub(/,$/, '')
|
97
|
+
else
|
98
|
+
result
|
99
|
+
end
|
100
|
+
end
|
101
|
+
solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
|
102
|
+
end
|
103
|
+
|
104
|
+
# add plain MODS <genre> element data, not the SearchWorks genre values
|
105
|
+
def add_genre sdb, solr_doc
|
106
|
+
insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
|
107
|
+
end
|
108
|
+
|
109
|
+
# add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
|
110
|
+
# data in location/physicalLocation or in relatedItem/location/physicalLocation
|
111
|
+
# TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
|
112
|
+
def add_series(sdb, solr_doc)
|
113
|
+
# see spec for data from actual collections
|
114
|
+
# _location.physicalLocation should find top level and relatedItem
|
115
|
+
series_num = sdb.smods_rec._location.physicalLocation.map do |node|
|
116
|
+
val = node.text
|
117
|
+
# feigenbaum uses 'Accession'
|
118
|
+
match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
|
119
|
+
match_data[1].strip if match_data.present?
|
120
|
+
end
|
121
|
+
solr_doc['series_ssi'] = series_num.first if series_num.present?
|
122
|
+
end
|
123
|
+
|
124
|
+
def mods_cartographics_indexing sdb, solr_doc
|
125
|
+
insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
|
126
|
+
|
127
|
+
Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
|
128
|
+
next unless n.text =~ /^\(/ and n.text =~ /\)$/
|
129
|
+
|
130
|
+
bbox = n.text.gsub(/[\(\)]/, '')
|
131
|
+
|
132
|
+
lng, lat = bbox.split('/')
|
133
|
+
|
134
|
+
minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
|
135
|
+
maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
|
136
|
+
|
137
|
+
solr_doc["point_bbox"] ||= []
|
138
|
+
solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def coord_to_decimal point
|
143
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
|
144
|
+
match = regex.match(point)
|
145
|
+
dec = 0
|
146
|
+
|
147
|
+
dec += match['deg'].to_i
|
148
|
+
dec += match['sec'].to_f / 60
|
149
|
+
dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
|
150
|
+
|
151
|
+
dec
|
152
|
+
end
|
153
|
+
|
154
|
+
def insert_field solr_doc, field, values, *args
|
155
|
+
Array(values).each do |v|
|
156
|
+
Solrizer.insert_field solr_doc, field, v, *args
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require "harvestdor-indexer"
|
2
|
+
require "spotlight/dor/resources/version"
|
3
|
+
|
4
|
+
module Spotlight
|
5
|
+
module Dor
|
6
|
+
module Resources
|
7
|
+
|
8
|
+
require "spotlight/dor/indexer"
|
9
|
+
require "spotlight/dor/resources/engine"
|
10
|
+
|
11
|
+
def self.indexer
|
12
|
+
@indexer ||= Spotlight::Dor::Indexer.new File.join(Rails.root, "config", "gdor.yml"), solr: Blacklight.solr_config
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spotlight/engine'
|
2
|
+
require 'spotlight/dor/resources'
|
3
|
+
|
4
|
+
module Spotlight::Dor::Resources
|
5
|
+
class Engine < ::Rails::Engine
|
6
|
+
|
7
|
+
initializer "spotlight.dor.initialize" do
|
8
|
+
Spotlight::Engine.config.resource_providers << Spotlight::Resources::Searchworks
|
9
|
+
Spotlight::Engine.config.resource_providers << Spotlight::Resources::Purl
|
10
|
+
Spotlight::Dor::Resources::Engine.config.parallel_options = { in_threads: 1 }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<schema name="Hydra" version="1.5">
|
3
|
+
<!-- NOTE: various comments and unused configuration possibilities have been purged
|
4
|
+
from this file. Please refer to http://wiki.apache.org/solr/SchemaXml,
|
5
|
+
as well as the default schema file included with Solr -->
|
6
|
+
|
7
|
+
<uniqueKey>id</uniqueKey>
|
8
|
+
|
9
|
+
<fields>
|
10
|
+
<field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
|
11
|
+
<field name="_version_" type="long" indexed="true" stored="true"/>
|
12
|
+
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
13
|
+
|
14
|
+
<field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
15
|
+
<field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
16
|
+
<field name="original_pid_tesim" type="pid_text" stored="true" indexed="true" multiValued="true"/>
|
17
|
+
|
18
|
+
<field name="full_title_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
|
19
|
+
<field name="id_ng" type="text_en_ng" stored="false" indexed="true" multiValued="false"/>
|
20
|
+
|
21
|
+
<!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
|
22
|
+
|
23
|
+
<!-- text (_t...) -->
|
24
|
+
<!--
|
25
|
+
<dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
|
26
|
+
<dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
|
27
|
+
-->
|
28
|
+
<dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
|
29
|
+
<dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
|
30
|
+
<dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
|
31
|
+
<dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
|
32
|
+
<!--
|
33
|
+
<dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
34
|
+
<dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
35
|
+
-->
|
36
|
+
<dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
37
|
+
<dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
38
|
+
|
39
|
+
<!-- English text (_te...) -->
|
40
|
+
<!--
|
41
|
+
<dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
|
42
|
+
<dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
|
43
|
+
-->
|
44
|
+
<dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
|
45
|
+
<dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
|
46
|
+
<dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
|
47
|
+
<dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
|
48
|
+
<!--
|
49
|
+
<dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
50
|
+
<dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
51
|
+
-->
|
52
|
+
<dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
|
53
|
+
<dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
54
|
+
|
55
|
+
<!-- string (_s...) -->
|
56
|
+
<!--
|
57
|
+
<dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
|
58
|
+
<dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
|
59
|
+
-->
|
60
|
+
<dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
|
61
|
+
<dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
|
62
|
+
<dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
|
63
|
+
<dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
|
64
|
+
<!--
|
65
|
+
<dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
|
66
|
+
-->
|
67
|
+
|
68
|
+
<!-- integer (_i...) -->
|
69
|
+
<!--
|
70
|
+
<dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
|
71
|
+
<dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
|
72
|
+
-->
|
73
|
+
<dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
|
74
|
+
<dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
|
75
|
+
<dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
|
76
|
+
<dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
|
77
|
+
|
78
|
+
<!-- trie integer (_it...) (for faster range queries) -->
|
79
|
+
<!--
|
80
|
+
<dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
|
81
|
+
<dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
|
82
|
+
-->
|
83
|
+
<dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
|
84
|
+
<dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
|
85
|
+
<dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
|
86
|
+
<dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
|
87
|
+
|
88
|
+
<!-- date (_dt...) -->
|
89
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z
|
90
|
+
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
|
91
|
+
<!--
|
92
|
+
<dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
|
93
|
+
<dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
|
94
|
+
-->
|
95
|
+
<dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
|
96
|
+
<dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
|
97
|
+
<dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
|
98
|
+
<dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
|
99
|
+
|
100
|
+
<!-- trie date (_dtt...) (for faster range queries) -->
|
101
|
+
<!--
|
102
|
+
<dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
|
103
|
+
<dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
|
104
|
+
-->
|
105
|
+
<dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
|
106
|
+
<dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
|
107
|
+
<dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
|
108
|
+
<dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
|
109
|
+
|
110
|
+
<!-- long (_l...) -->
|
111
|
+
<!--
|
112
|
+
<dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
|
113
|
+
<dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
|
114
|
+
-->
|
115
|
+
<dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
|
116
|
+
<dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
|
117
|
+
<dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
|
118
|
+
<dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
|
119
|
+
|
120
|
+
<!-- trie long (_lt...) (for faster range queries) -->
|
121
|
+
<!--
|
122
|
+
<dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
|
123
|
+
<dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
|
124
|
+
-->
|
125
|
+
<dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
|
126
|
+
<dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
|
127
|
+
<dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
|
128
|
+
<dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
|
129
|
+
|
130
|
+
<!-- double (_db...) -->
|
131
|
+
<!--
|
132
|
+
<dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
|
133
|
+
<dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
|
134
|
+
-->
|
135
|
+
<dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
|
136
|
+
<dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
|
137
|
+
<dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
|
138
|
+
<dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
|
139
|
+
|
140
|
+
<!-- trie double (_dbt...) (for faster range queries) -->
|
141
|
+
<!--
|
142
|
+
<dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
|
143
|
+
<dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
|
144
|
+
-->
|
145
|
+
<dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
|
146
|
+
<dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
|
147
|
+
<dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
|
148
|
+
<dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
|
149
|
+
|
150
|
+
<!-- float (_f...) -->
|
151
|
+
<!--
|
152
|
+
<dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
|
153
|
+
<dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
|
154
|
+
-->
|
155
|
+
<dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
|
156
|
+
<dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
|
157
|
+
<dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
|
158
|
+
<dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
|
159
|
+
|
160
|
+
<!-- trie float (_ft...) (for faster range queries) -->
|
161
|
+
<!--
|
162
|
+
<dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
|
163
|
+
<dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
|
164
|
+
-->
|
165
|
+
<dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
|
166
|
+
<dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
|
167
|
+
<dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
|
168
|
+
<dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
|
169
|
+
|
170
|
+
<!-- boolean (_b...) -->
|
171
|
+
<!--
|
172
|
+
<dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
|
173
|
+
-->
|
174
|
+
<dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
|
175
|
+
<dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
|
176
|
+
|
177
|
+
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
178
|
+
<!--
|
179
|
+
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
180
|
+
-->
|
181
|
+
|
182
|
+
<!-- location (_ll...) -->
|
183
|
+
<!--
|
184
|
+
<dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
|
185
|
+
<dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
|
186
|
+
-->
|
187
|
+
<dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
|
188
|
+
<dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
|
189
|
+
<dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
|
190
|
+
<dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
|
191
|
+
|
192
|
+
<!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
|
193
|
+
<field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
|
194
|
+
|
195
|
+
|
196
|
+
</fields>
|
197
|
+
|
198
|
+
|
199
|
+
<!-- Above, multiple source fields are copied to the [text] field.
|
200
|
+
Another way to map multiple source fields to the same
|
201
|
+
destination field is to use the dynamic field syntax.
|
202
|
+
copyField also supports a maxChars to copy setting. -->
|
203
|
+
|
204
|
+
<copyField source="id" dest="id_ng" maxChars="3000"/>
|
205
|
+
<copyField source="full_title_tesim" dest="full_title_ng" maxChars="3000"/>
|
206
|
+
<copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/>
|
207
|
+
|
208
|
+
<types>
|
209
|
+
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
|
210
|
+
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
211
|
+
<fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
|
212
|
+
|
213
|
+
<!-- Default numeric field types. -->
|
214
|
+
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
|
215
|
+
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
|
216
|
+
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
|
217
|
+
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
|
218
|
+
|
219
|
+
<!-- trie numeric field types for faster range queries -->
|
220
|
+
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
|
221
|
+
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
|
222
|
+
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
|
223
|
+
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
|
224
|
+
|
225
|
+
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z
|
226
|
+
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
227
|
+
-->
|
228
|
+
<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
|
229
|
+
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
230
|
+
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
|
231
|
+
|
232
|
+
|
233
|
+
<!-- This point type indexes the coordinates as separate fields (subFields)
|
234
|
+
If subFieldType is defined, it references a type, and a dynamic field
|
235
|
+
definition is created matching *___<typename>. Alternately, if
|
236
|
+
subFieldSuffix is defined, that is used to create the subFields.
|
237
|
+
Example: if subFieldType="double", then the coordinates would be
|
238
|
+
indexed in fields myloc_0___double,myloc_1___double.
|
239
|
+
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
240
|
+
in fields myloc_0_d,myloc_1_d
|
241
|
+
The subFields are an implementation detail of the fieldType, and end
|
242
|
+
users normally should not need to know about them.
|
243
|
+
-->
|
244
|
+
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
245
|
+
|
246
|
+
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
247
|
+
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
248
|
+
|
249
|
+
<!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
|
250
|
+
For more information about this and other Spatial fields new to Solr 4, see:
|
251
|
+
http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
|
252
|
+
-->
|
253
|
+
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
|
254
|
+
geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
|
255
|
+
|
256
|
+
<fieldType name="text" class="solr.TextField" omitNorms="false">
|
257
|
+
<analyzer>
|
258
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
259
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
260
|
+
<filter class="solr.TrimFilterFactory"/>
|
261
|
+
</analyzer>
|
262
|
+
</fieldType>
|
263
|
+
|
264
|
+
<!-- A text field that only splits on whitespace for exact matching of words -->
|
265
|
+
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
266
|
+
<analyzer>
|
267
|
+
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
268
|
+
<filter class="solr.TrimFilterFactory"/>
|
269
|
+
</analyzer>
|
270
|
+
</fieldType>
|
271
|
+
|
272
|
+
<!-- single token analyzed text, for sorting. Punctuation is significant. -->
|
273
|
+
<fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
274
|
+
<analyzer>
|
275
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
276
|
+
<filter class="solr.ICUFoldingFilterFactory"/>
|
277
|
+
<filter class="solr.TrimFilterFactory" />
|
278
|
+
</analyzer>
|
279
|
+
</fieldtype>
|
280
|
+
|
281
|
+
<!-- A text field with defaults appropriate for English -->
|
282
|
+
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
|
283
|
+
<analyzer>
|
284
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
285
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
286
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
287
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
288
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
289
|
+
<!--
|
290
|
+
<filter class="solr.PorterStemFilterFactory"/>
|
291
|
+
-->
|
292
|
+
<filter class="solr.TrimFilterFactory"/>
|
293
|
+
</analyzer>
|
294
|
+
</fieldType>
|
295
|
+
|
296
|
+
<!-- A text field with defaults appropriate for English an NGrams -->
|
297
|
+
<fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
|
298
|
+
<analyzer type="index">
|
299
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
300
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
301
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
302
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
303
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
304
|
+
<filter class="solr.TrimFilterFactory"/>
|
305
|
+
<filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" side="front"/>:
|
306
|
+
</analyzer>
|
307
|
+
|
308
|
+
<analyzer type="index">
|
309
|
+
<tokenizer class="solr.ICUTokenizerFactory"/>
|
310
|
+
<filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
|
311
|
+
<filter class="solr.EnglishPossessiveFilterFactory"/>
|
312
|
+
<!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
|
313
|
+
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
314
|
+
<filter class="solr.TrimFilterFactory"/>
|
315
|
+
</analyzer>
|
316
|
+
</fieldType>
|
317
|
+
|
318
|
+
<fieldType name="pid_text" class="solr.TextField" positionIncrementGap="100">
|
319
|
+
<analyzer>
|
320
|
+
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
321
|
+
</analyzer>
|
322
|
+
</fieldType>
|
323
|
+
|
324
|
+
<!-- queries for paths match documents at that path, or in descendent paths -->
|
325
|
+
<fieldType name="descendent_path" class="solr.TextField">
|
326
|
+
<analyzer type="index">
|
327
|
+
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
328
|
+
</analyzer>
|
329
|
+
<analyzer type="query">
|
330
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
331
|
+
</analyzer>
|
332
|
+
</fieldType>
|
333
|
+
|
334
|
+
<!-- queries for paths match documents at that path, or in ancestor paths -->
|
335
|
+
<fieldType name="ancestor_path" class="solr.TextField">
|
336
|
+
<analyzer type="index">
|
337
|
+
<tokenizer class="solr.KeywordTokenizerFactory" />
|
338
|
+
</analyzer>
|
339
|
+
<analyzer type="query">
|
340
|
+
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
|
341
|
+
</analyzer>
|
342
|
+
</fieldType>
|
343
|
+
|
344
|
+
</types>
|
345
|
+
|
346
|
+
</schema>
|