spotlight-dor-resources 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.hound.yml +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.rubocop_todo.yml +191 -0
  6. data/.travis.yml +17 -0
  7. data/Gemfile +32 -0
  8. data/LICENSE.txt +13 -0
  9. data/README.md +65 -0
  10. data/Rakefile +48 -0
  11. data/app/models/spotlight/resources/dor_resource.rb +17 -0
  12. data/app/models/spotlight/resources/harvestdor.rb +4 -0
  13. data/app/models/spotlight/resources/purl.rb +14 -0
  14. data/app/models/spotlight/resources/searchworks.rb +15 -0
  15. data/lib/spotlight/dor/indexer.rb +160 -0
  16. data/lib/spotlight/dor/resources.rb +16 -0
  17. data/lib/spotlight/dor/resources/engine.rb +13 -0
  18. data/lib/spotlight/dor/resources/version.rb +7 -0
  19. data/solr_conf/conf/schema.xml +346 -0
  20. data/solr_conf/conf/solrconfig.xml +180 -0
  21. data/spec/integration/gdor_integration_spec.rb +30 -0
  22. data/spec/integration/indexer_integration_spec.rb +28 -0
  23. data/spec/models/spotlight/resources/purl_spec.rb +115 -0
  24. data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
  25. data/spec/spec_helper.rb +60 -0
  26. data/spec/test_app_templates/catalog_controller.rb +96 -0
  27. data/spec/test_app_templates/gdor.yml +9 -0
  28. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  29. data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
  30. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
  31. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
  32. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
  33. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
  34. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
  35. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
  36. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
  37. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
  38. data/spotlight-dor-resources.gemspec +37 -0
  39. metadata +336 -0
@@ -0,0 +1,14 @@
1
+ module Spotlight::Resources
2
+ class Purl < Spotlight::Resources::DorResource
3
+ self.weight = -1000
4
+
5
+ def self.can_provide? res
6
+ !!(res.url =~ /^https?:\/\/purl.stanford.edu/)
7
+ end
8
+
9
+ def doc_id
10
+ url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module Spotlight::Resources
2
+ class Searchworks < Spotlight::Resources::DorResource
3
+
4
+ self.weight = -1000
5
+
6
+ def self.can_provide? res
7
+ !!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
8
+ end
9
+
10
+ def doc_id
11
+ url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,160 @@
1
+ # external gems
2
+ require 'gdor/indexer'
3
+ require 'solrizer'
4
+ # Base class to harvest from DOR via harvestdor gem
5
+ module Spotlight::Dor
6
+ class Indexer < GDor::Indexer
7
+ # add contentMetadata fields
8
+ before_index do |sdb, solr_doc|
9
+ Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
10
+
11
+ sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
12
+ file_id = node.attr('id').gsub(".jp2", '')
13
+
14
+ if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
15
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
16
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
17
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
18
+ end
19
+
20
+ Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
21
+ Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
22
+ Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
23
+ Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
24
+ Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
25
+ end
26
+ end
27
+
28
+ # tweak author_sort field from stanford-mods
29
+ before_index do |_sdb, solr_doc|
30
+ solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
31
+ end
32
+
33
+ # add fields from raw mods
34
+ # see comment with add_donor_tags about Feigenbaum specific donor tags data
35
+ before_index :add_box
36
+ before_index :add_donor_tags
37
+ before_index :add_genre
38
+ before_index :add_folder
39
+ before_index :add_series
40
+ before_index :mods_cartographics_indexing
41
+
42
+ def solr_client
43
+ @solr_client
44
+ end
45
+
46
+ def solr_document resource
47
+ doc_hash = super
48
+ run_hook :before_index, resource, doc_hash
49
+ doc_hash
50
+ end
51
+
52
+ def resource druid
53
+ Harvestdor::Indexer::Resource.new harvestdor, druid
54
+ end
55
+
56
+ private
57
+
58
+ # add the box number to solr_doc as box_ssi field (note: single valued!)
59
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
60
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
61
+ def add_box(sdb, solr_doc)
62
+ # see spec for data from actual collections
63
+ # _location.physicalLocation should find top level and relatedItem
64
+ box_num = sdb.smods_rec._location.physicalLocation.map do |node|
65
+ val = node.text
66
+ # note that this will also find Flatbox or Flat-box
67
+ match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
68
+ match_data[1].strip if match_data.present?
69
+ end
70
+ solr_doc['box_ssi'] = box_num.first if box_num.present?
71
+ end
72
+
73
+ # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
74
+ # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
75
+ # Later refactoring could include project specific fields. Peter Mangiafico
76
+ def add_donor_tags sdb, solr_doc
77
+ donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
78
+ insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
79
+ end
80
+
81
+ # add the folder number to solr_doc as folder_ssi field (note: single valued!)
82
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
83
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
84
+ def add_folder(sdb, solr_doc)
85
+ # see spec for data from actual collections
86
+ # _location.physicalLocation should find top level and relatedItem
87
+ folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
88
+ val = node.text
89
+ # folder may be text with commas
90
+ match_data = val.match(/Folder:? ?(.+)/i)
91
+ next if match_data.blank?
92
+ result = match_data[1].strip
93
+ # Menuez collection may have folder followed by Sleeve then Frame
94
+ match2_data = result.match(/(.*),? ?Sleeve/i)
95
+ if match2_data
96
+ match2_data[1].strip.sub(/,$/, '')
97
+ else
98
+ result
99
+ end
100
+ end
101
+ solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
102
+ end
103
+
104
+ # add plain MODS <genre> element data, not the SearchWorks genre values
105
+ def add_genre sdb, solr_doc
106
+ insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
107
+ end
108
+
109
+ # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
110
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
111
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
112
+ def add_series(sdb, solr_doc)
113
+ # see spec for data from actual collections
114
+ # _location.physicalLocation should find top level and relatedItem
115
+ series_num = sdb.smods_rec._location.physicalLocation.map do |node|
116
+ val = node.text
117
+ # feigenbaum uses 'Accession'
118
+ match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
119
+ match_data[1].strip if match_data.present?
120
+ end
121
+ solr_doc['series_ssi'] = series_num.first if series_num.present?
122
+ end
123
+
124
+ def mods_cartographics_indexing sdb, solr_doc
125
+ insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
126
+
127
+ Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
128
+ next unless n.text =~ /^\(/ and n.text =~ /\)$/
129
+
130
+ bbox = n.text.gsub(/[\(\)]/, '')
131
+
132
+ lng, lat = bbox.split('/')
133
+
134
+ minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
135
+ maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
136
+
137
+ solr_doc["point_bbox"] ||= []
138
+ solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
139
+ end
140
+ end
141
+
142
+ def coord_to_decimal point
143
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
144
+ match = regex.match(point)
145
+ dec = 0
146
+
147
+ dec += match['deg'].to_i
148
+ dec += match['sec'].to_f / 60
149
+ dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
150
+
151
+ dec
152
+ end
153
+
154
+ def insert_field solr_doc, field, values, *args
155
+ Array(values).each do |v|
156
+ Solrizer.insert_field solr_doc, field, v, *args
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,16 @@
1
+ require "harvestdor-indexer"
2
+ require "spotlight/dor/resources/version"
3
+
4
+ module Spotlight
5
+ module Dor
6
+ module Resources
7
+
8
+ require "spotlight/dor/indexer"
9
+ require "spotlight/dor/resources/engine"
10
+
11
+ def self.indexer
12
+ @indexer ||= Spotlight::Dor::Indexer.new File.join(Rails.root, "config", "gdor.yml"), solr: Blacklight.solr_config
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ require 'spotlight/engine'
2
+ require 'spotlight/dor/resources'
3
+
4
+ module Spotlight::Dor::Resources
5
+ class Engine < ::Rails::Engine
6
+
7
+ initializer "spotlight.dor.initialize" do
8
+ Spotlight::Engine.config.resource_providers << Spotlight::Resources::Searchworks
9
+ Spotlight::Engine.config.resource_providers << Spotlight::Resources::Purl
10
+ Spotlight::Dor::Resources::Engine.config.parallel_options = { in_threads: 1 }
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ module Spotlight
2
+ module Dor
3
+ module Resources
4
+ VERSION = "0.0.1"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,346 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <schema name="Hydra" version="1.5">
3
+ <!-- NOTE: various comments and unused configuration possibilities have been purged
4
+ from this file. Please refer to http://wiki.apache.org/solr/SchemaXml,
5
+ as well as the default schema file included with Solr -->
6
+
7
+ <uniqueKey>id</uniqueKey>
8
+
9
+ <fields>
10
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
11
+ <field name="_version_" type="long" indexed="true" stored="true"/>
12
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
13
+
14
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
15
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
16
+ <field name="original_pid_tesim" type="pid_text" stored="true" indexed="true" multiValued="true"/>
17
+
18
+ <field name="full_title_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
19
+ <field name="id_ng" type="text_en_ng" stored="false" indexed="true" multiValued="false"/>
20
+
21
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
22
+
23
+ <!-- text (_t...) -->
24
+ <!--
25
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
26
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
27
+ -->
28
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
29
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
30
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
31
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
32
+ <!--
33
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
34
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
35
+ -->
36
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
37
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
38
+
39
+ <!-- English text (_te...) -->
40
+ <!--
41
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
42
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
43
+ -->
44
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
45
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
46
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
47
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
48
+ <!--
49
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
50
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
51
+ -->
52
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
53
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
54
+
55
+ <!-- string (_s...) -->
56
+ <!--
57
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
58
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
59
+ -->
60
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
61
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
62
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
63
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
64
+ <!--
65
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
66
+ -->
67
+
68
+ <!-- integer (_i...) -->
69
+ <!--
70
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
71
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
72
+ -->
73
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
74
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
75
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
76
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
77
+
78
+ <!-- trie integer (_it...) (for faster range queries) -->
79
+ <!--
80
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
81
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
82
+ -->
83
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
84
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
85
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
86
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
87
+
88
+ <!-- date (_dt...) -->
89
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
90
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
91
+ <!--
92
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
93
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
94
+ -->
95
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
96
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
97
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
98
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
99
+
100
+ <!-- trie date (_dtt...) (for faster range queries) -->
101
+ <!--
102
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
103
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
104
+ -->
105
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
106
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
107
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
108
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
109
+
110
+ <!-- long (_l...) -->
111
+ <!--
112
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
113
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
114
+ -->
115
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
116
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
117
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
118
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
119
+
120
+ <!-- trie long (_lt...) (for faster range queries) -->
121
+ <!--
122
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
123
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
124
+ -->
125
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
126
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
127
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
128
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
129
+
130
+ <!-- double (_db...) -->
131
+ <!--
132
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
133
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
134
+ -->
135
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
136
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
137
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
138
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
139
+
140
+ <!-- trie double (_dbt...) (for faster range queries) -->
141
+ <!--
142
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
143
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
144
+ -->
145
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
146
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
147
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
148
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
149
+
150
+ <!-- float (_f...) -->
151
+ <!--
152
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
153
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
154
+ -->
155
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
156
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
157
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
158
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
159
+
160
+ <!-- trie float (_ft...) (for faster range queries) -->
161
+ <!--
162
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
163
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
164
+ -->
165
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
166
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
167
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
168
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
169
+
170
+ <!-- boolean (_b...) -->
171
+ <!--
172
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
173
+ -->
174
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
175
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
176
+
177
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
178
+ <!--
179
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
180
+ -->
181
+
182
+ <!-- location (_ll...) -->
183
+ <!--
184
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
185
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
186
+ -->
187
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
188
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
189
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
190
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
191
+
192
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
193
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
194
+
195
+
196
+ </fields>
197
+
198
+
199
+ <!-- Above, multiple source fields are copied to the [text] field.
200
+ Another way to map multiple source fields to the same
201
+ destination field is to use the dynamic field syntax.
202
+ copyField also supports a maxChars to copy setting. -->
203
+
204
+ <copyField source="id" dest="id_ng" maxChars="3000"/>
205
+ <copyField source="full_title_tesim" dest="full_title_ng" maxChars="3000"/>
206
+ <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/>
207
+
208
+ <types>
209
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
210
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
211
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
212
+
213
+ <!-- Default numeric field types. -->
214
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
215
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
216
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
217
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
218
+
219
+ <!-- trie numeric field types for faster range queries -->
220
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
221
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
222
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
223
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
224
+
225
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
226
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
227
+ -->
228
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
229
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
230
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
231
+
232
+
233
+ <!-- This point type indexes the coordinates as separate fields (subFields)
234
+ If subFieldType is defined, it references a type, and a dynamic field
235
+ definition is created matching *___<typename>. Alternately, if
236
+ subFieldSuffix is defined, that is used to create the subFields.
237
+ Example: if subFieldType="double", then the coordinates would be
238
+ indexed in fields myloc_0___double,myloc_1___double.
239
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
240
+ in fields myloc_0_d,myloc_1_d
241
+ The subFields are an implementation detail of the fieldType, and end
242
+ users normally should not need to know about them.
243
+ -->
244
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
245
+
246
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
247
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
248
+
249
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
250
+ For more information about this and other Spatial fields new to Solr 4, see:
251
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
252
+ -->
253
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
254
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
255
+
256
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
257
+ <analyzer>
258
+ <tokenizer class="solr.ICUTokenizerFactory"/>
259
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
260
+ <filter class="solr.TrimFilterFactory"/>
261
+ </analyzer>
262
+ </fieldType>
263
+
264
+ <!-- A text field that only splits on whitespace for exact matching of words -->
265
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
266
+ <analyzer>
267
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
268
+ <filter class="solr.TrimFilterFactory"/>
269
+ </analyzer>
270
+ </fieldType>
271
+
272
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
273
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
274
+ <analyzer>
275
+ <tokenizer class="solr.KeywordTokenizerFactory" />
276
+ <filter class="solr.ICUFoldingFilterFactory"/>
277
+ <filter class="solr.TrimFilterFactory" />
278
+ </analyzer>
279
+ </fieldtype>
280
+
281
+ <!-- A text field with defaults appropriate for English -->
282
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
283
+ <analyzer>
284
+ <tokenizer class="solr.ICUTokenizerFactory"/>
285
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
286
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
287
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
288
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
289
+ <!--
290
+ <filter class="solr.PorterStemFilterFactory"/>
291
+ -->
292
+ <filter class="solr.TrimFilterFactory"/>
293
+ </analyzer>
294
+ </fieldType>
295
+
296
+ <!-- A text field with defaults appropriate for English an NGrams -->
297
+ <fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
298
+ <analyzer type="index">
299
+ <tokenizer class="solr.ICUTokenizerFactory"/>
300
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
301
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
302
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
303
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
304
+ <filter class="solr.TrimFilterFactory"/>
305
+ <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" side="front"/>:
306
+ </analyzer>
307
+
308
+ <analyzer type="index">
309
+ <tokenizer class="solr.ICUTokenizerFactory"/>
310
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
311
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
312
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
313
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
314
+ <filter class="solr.TrimFilterFactory"/>
315
+ </analyzer>
316
+ </fieldType>
317
+
318
+ <fieldType name="pid_text" class="solr.TextField" positionIncrementGap="100">
319
+ <analyzer>
320
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
321
+ </analyzer>
322
+ </fieldType>
323
+
324
+ <!-- queries for paths match documents at that path, or in descendent paths -->
325
+ <fieldType name="descendent_path" class="solr.TextField">
326
+ <analyzer type="index">
327
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
328
+ </analyzer>
329
+ <analyzer type="query">
330
+ <tokenizer class="solr.KeywordTokenizerFactory" />
331
+ </analyzer>
332
+ </fieldType>
333
+
334
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
335
+ <fieldType name="ancestor_path" class="solr.TextField">
336
+ <analyzer type="index">
337
+ <tokenizer class="solr.KeywordTokenizerFactory" />
338
+ </analyzer>
339
+ <analyzer type="query">
340
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
341
+ </analyzer>
342
+ </fieldType>
343
+
344
+ </types>
345
+
346
+ </schema>