spotlight-dor-resources 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.hound.yml +2 -0
  4. data/.rubocop.yml +8 -0
  5. data/.rubocop_todo.yml +191 -0
  6. data/.travis.yml +17 -0
  7. data/Gemfile +32 -0
  8. data/LICENSE.txt +13 -0
  9. data/README.md +65 -0
  10. data/Rakefile +48 -0
  11. data/app/models/spotlight/resources/dor_resource.rb +17 -0
  12. data/app/models/spotlight/resources/harvestdor.rb +4 -0
  13. data/app/models/spotlight/resources/purl.rb +14 -0
  14. data/app/models/spotlight/resources/searchworks.rb +15 -0
  15. data/lib/spotlight/dor/indexer.rb +160 -0
  16. data/lib/spotlight/dor/resources.rb +16 -0
  17. data/lib/spotlight/dor/resources/engine.rb +13 -0
  18. data/lib/spotlight/dor/resources/version.rb +7 -0
  19. data/solr_conf/conf/schema.xml +346 -0
  20. data/solr_conf/conf/solrconfig.xml +180 -0
  21. data/spec/integration/gdor_integration_spec.rb +30 -0
  22. data/spec/integration/indexer_integration_spec.rb +28 -0
  23. data/spec/models/spotlight/resources/purl_spec.rb +115 -0
  24. data/spec/models/spotlight/resources/searchworks_spec.rb +91 -0
  25. data/spec/spec_helper.rb +60 -0
  26. data/spec/test_app_templates/catalog_controller.rb +96 -0
  27. data/spec/test_app_templates/gdor.yml +9 -0
  28. data/spec/test_app_templates/lib/generators/test_app_generator.rb +40 -0
  29. data/spec/unit/spotlight/dor/indexer_spec.rb +200 -0
  30. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_a_doc_id.yml +1201 -0
  31. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_exhibit-specific_indexing.yml +1003 -0
  32. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_spotlight_data.yml +1003 -0
  33. data/spec/vcr_cassettes/gdor_indexing_integration_test/should_have_the_gdor_data.yml +1003 -0
  34. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/no_donor_tags_ssim_field_in_solr_doc_when_note_displayLabel_Donor_tags_not_in_MODS.yml +1382 -0
  35. data/spec/vcr_cassettes/indexer_integration_tests/donor_tags/solr_doc_has_donor_tags_ssim_field_when_note_displayLabel_Donor_tags_is_in_MODS.yml +1602 -0
  36. data/spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml +6822 -0
  37. data/spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml +1390 -0
  38. data/spotlight-dor-resources.gemspec +37 -0
  39. metadata +336 -0
@@ -0,0 +1,14 @@
1
+ module Spotlight::Resources
2
+ class Purl < Spotlight::Resources::DorResource
3
+ self.weight = -1000
4
+
5
+ def self.can_provide? res
6
+ !!(res.url =~ /^https?:\/\/purl.stanford.edu/)
7
+ end
8
+
9
+ def doc_id
10
+ url.match(/^https?:\/\/purl.stanford.edu\/([^#\/\.]+)/)[1]
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module Spotlight::Resources
2
+ class Searchworks < Spotlight::Resources::DorResource
3
+
4
+ self.weight = -1000
5
+
6
+ def self.can_provide? res
7
+ !!(res.url =~ /^https?:\/\/searchworks[^\.]*.stanford.edu/)
8
+ end
9
+
10
+ def doc_id
11
+ url.match(/^https?:\/\/searchworks[^\.]*.stanford.edu\/.*view\/([^\/\.#]+)/)[1]
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,160 @@
1
+ # external gems
2
+ require 'gdor/indexer'
3
+ require 'solrizer'
4
+ # Base class to harvest from DOR via harvestdor gem
5
+ module Spotlight::Dor
6
+ class Indexer < GDor::Indexer
7
+ # add contentMetadata fields
8
+ before_index do |sdb, solr_doc|
9
+ Solrizer.insert_field(solr_doc, 'content_metadata_type', sdb.public_xml.xpath("/publicObject/contentMetadata/@type").text, :symbol, :displayable)
10
+
11
+ sdb.public_xml.xpath("/publicObject/contentMetadata").xpath('resource/file[@mimetype="image/jp2"]').each do |node|
12
+ file_id = node.attr('id').gsub(".jp2", '')
13
+
14
+ if node.attr('id') =~ /jp2$/ and !solr_doc[Solrizer.solr_name('content_metadata_first_image_file_name', :displayable)]
15
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_file_name', file_id, :displayable)
16
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_width', node.xpath('./imageData/@width').text, :displayable)
17
+ Solrizer.insert_field(solr_doc, 'content_metadata_first_image_height', node.xpath('./imageData/@height').text, :displayable)
18
+ end
19
+
20
+ Solrizer.insert_field(solr_doc, 'content_metadata_image_iiif_info', "https://stacks.stanford.edu/image/iiif/#{solr_doc[:id]}%2F#{file_id}/info.json", :displayable)
21
+ Solrizer.insert_field(solr_doc, 'thumbnail_square_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_square", :displayable)
22
+ Solrizer.insert_field(solr_doc, 'thumbnail_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_thumb", :displayable)
23
+ Solrizer.insert_field(solr_doc, 'large_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_large", :displayable)
24
+ Solrizer.insert_field(solr_doc, 'full_image_url', "https://stacks.stanford.edu/image/#{solr_doc[:id]}/#{file_id}_full", :displayable)
25
+ end
26
+ end
27
+
28
+ # tweak author_sort field from stanford-mods
29
+ before_index do |_sdb, solr_doc|
30
+ solr_doc[:author_sort] &&= solr_doc[:author_sort].gsub("\uFFFF", "\uFFFD")
31
+ end
32
+
33
+ # add fields from raw mods
34
+ # see comment with add_donor_tags about Feigenbaum specific donor tags data
35
+ before_index :add_box
36
+ before_index :add_donor_tags
37
+ before_index :add_genre
38
+ before_index :add_folder
39
+ before_index :add_series
40
+ before_index :mods_cartographics_indexing
41
+
42
+ def solr_client
43
+ @solr_client
44
+ end
45
+
46
+ def solr_document resource
47
+ doc_hash = super
48
+ run_hook :before_index, resource, doc_hash
49
+ doc_hash
50
+ end
51
+
52
+ def resource druid
53
+ Harvestdor::Indexer::Resource.new harvestdor, druid
54
+ end
55
+
56
+ private
57
+
58
+ # add the box number to solr_doc as box_ssi field (note: single valued!)
59
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
60
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
61
+ def add_box(sdb, solr_doc)
62
+ # see spec for data from actual collections
63
+ # _location.physicalLocation should find top level and relatedItem
64
+ box_num = sdb.smods_rec._location.physicalLocation.map do |node|
65
+ val = node.text
66
+ # note that this will also find Flatbox or Flat-box
67
+ match_data = val.match(/Box ?:? ?([^,|(Folder)]+)/i)
68
+ match_data[1].strip if match_data.present?
69
+ end
70
+ solr_doc['box_ssi'] = box_num.first if box_num.present?
71
+ end
72
+
73
+ # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit. It is very likely
74
+ # it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
75
+ # Later refactoring could include project specific fields. Peter Mangiafico
76
+ def add_donor_tags sdb, solr_doc
77
+ donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
78
+ insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
79
+ end
80
+
81
+ # add the folder number to solr_doc as folder_ssi field (note: single valued!)
82
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
83
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
84
+ def add_folder(sdb, solr_doc)
85
+ # see spec for data from actual collections
86
+ # _location.physicalLocation should find top level and relatedItem
87
+ folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
88
+ val = node.text
89
+ # folder may be text with commas
90
+ match_data = val.match(/Folder:? ?(.+)/i)
91
+ next if match_data.blank?
92
+ result = match_data[1].strip
93
+ # Menuez collection may have folder followed by Sleeve then Frame
94
+ match2_data = result.match(/(.*),? ?Sleeve/i)
95
+ if match2_data
96
+ match2_data[1].strip.sub(/,$/, '')
97
+ else
98
+ result
99
+ end
100
+ end
101
+ solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
102
+ end
103
+
104
+ # add plain MODS <genre> element data, not the SearchWorks genre values
105
+ def add_genre sdb, solr_doc
106
+ insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
107
+ end
108
+
109
+ # add the series/accession 'number' to solr_doc as series_ssi field (note: single valued!)
110
+ # data in location/physicalLocation or in relatedItem/location/physicalLocation
111
+ # TODO: push this up to stanford-mods gem? or should it be hierarchical series/box/folder?
112
+ def add_series(sdb, solr_doc)
113
+ # see spec for data from actual collections
114
+ # _location.physicalLocation should find top level and relatedItem
115
+ series_num = sdb.smods_rec._location.physicalLocation.map do |node|
116
+ val = node.text
117
+ # feigenbaum uses 'Accession'
118
+ match_data = val.match(/(?:(?:Series)|(?:Accession)):? ([^,|]+)/i)
119
+ match_data[1].strip if match_data.present?
120
+ end
121
+ solr_doc['series_ssi'] = series_num.first if series_num.present?
122
+ end
123
+
124
+ def mods_cartographics_indexing sdb, solr_doc
125
+ insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
126
+
127
+ Array(sdb.smods_rec.subject.cartographics.coordinates).map do |n|
128
+ next unless n.text =~ /^\(/ and n.text =~ /\)$/
129
+
130
+ bbox = n.text.gsub(/[\(\)]/, '')
131
+
132
+ lng, lat = bbox.split('/')
133
+
134
+ minX,maxX = lng.split('--').map { |x| coord_to_decimal(x) }
135
+ maxY,minY = lat.split('--').map { |x| coord_to_decimal(x) }
136
+
137
+ solr_doc["point_bbox"] ||= []
138
+ solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
139
+ end
140
+ end
141
+
142
+ def coord_to_decimal point
143
+ regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
144
+ match = regex.match(point)
145
+ dec = 0
146
+
147
+ dec += match['deg'].to_i
148
+ dec += match['sec'].to_f / 60
149
+ dec = -1 * dec if match['dir'] == "W" or match['dir'] == "S"
150
+
151
+ dec
152
+ end
153
+
154
+ def insert_field solr_doc, field, values, *args
155
+ Array(values).each do |v|
156
+ Solrizer.insert_field solr_doc, field, v, *args
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,16 @@
1
+ require "harvestdor-indexer"
2
+ require "spotlight/dor/resources/version"
3
+
4
+ module Spotlight
5
+ module Dor
6
+ module Resources
7
+
8
+ require "spotlight/dor/indexer"
9
+ require "spotlight/dor/resources/engine"
10
+
11
+ def self.indexer
12
+ @indexer ||= Spotlight::Dor::Indexer.new File.join(Rails.root, "config", "gdor.yml"), solr: Blacklight.solr_config
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ require 'spotlight/engine'
2
+ require 'spotlight/dor/resources'
3
+
4
+ module Spotlight::Dor::Resources
5
+ class Engine < ::Rails::Engine
6
+
7
+ initializer "spotlight.dor.initialize" do
8
+ Spotlight::Engine.config.resource_providers << Spotlight::Resources::Searchworks
9
+ Spotlight::Engine.config.resource_providers << Spotlight::Resources::Purl
10
+ Spotlight::Dor::Resources::Engine.config.parallel_options = { in_threads: 1 }
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ module Spotlight
2
+ module Dor
3
+ module Resources
4
+ VERSION = "0.0.1"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,346 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <schema name="Hydra" version="1.5">
3
+ <!-- NOTE: various comments and unused configuration possibilities have been purged
4
+ from this file. Please refer to http://wiki.apache.org/solr/SchemaXml,
5
+ as well as the default schema file included with Solr -->
6
+
7
+ <uniqueKey>id</uniqueKey>
8
+
9
+ <fields>
10
+ <field name="id" type="string" stored="true" indexed="true" multiValued="false" required="true"/>
11
+ <field name="_version_" type="long" indexed="true" stored="true"/>
12
+ <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
13
+
14
+ <field name="lat" type="tdouble" stored="true" indexed="true" multiValued="false"/>
15
+ <field name="lng" type="tdouble" stored="true" indexed="true" multiValued="false"/>
16
+ <field name="original_pid_tesim" type="pid_text" stored="true" indexed="true" multiValued="true"/>
17
+
18
+ <field name="full_title_ng" type="text_en_ng" stored="false" indexed="true" multiValued="true"/>
19
+ <field name="id_ng" type="text_en_ng" stored="false" indexed="true" multiValued="false"/>
20
+
21
+ <!-- NOTE: not all possible Solr field types are represented in the dynamic fields -->
22
+
23
+ <!-- text (_t...) -->
24
+ <!--
25
+ <dynamicField name="*_ti" type="text" stored="false" indexed="true" multiValued="false"/>
26
+ <dynamicField name="*_tim" type="text" stored="false" indexed="true" multiValued="true"/>
27
+ -->
28
+ <dynamicField name="*_ts" type="text" stored="true" indexed="false" multiValued="false"/>
29
+ <dynamicField name="*_tsm" type="text" stored="true" indexed="false" multiValued="true"/>
30
+ <dynamicField name="*_tsi" type="text" stored="true" indexed="true" multiValued="false"/>
31
+ <dynamicField name="*_tsim" type="text" stored="true" indexed="true" multiValued="true"/>
32
+ <!--
33
+ <dynamicField name="*_tiv" type="text" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
34
+ <dynamicField name="*_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
35
+ -->
36
+ <dynamicField name="*_tsiv" type="text" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
37
+ <dynamicField name="*_tsimv" type="text" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
38
+
39
+ <!-- English text (_te...) -->
40
+ <!--
41
+ <dynamicField name="*_tei" type="text_en" stored="false" indexed="true" multiValued="false"/>
42
+ <dynamicField name="*_teim" type="text_en" stored="false" indexed="true" multiValued="true"/>
43
+ -->
44
+ <dynamicField name="*_tes" type="text_en" stored="true" indexed="false" multiValued="false"/>
45
+ <dynamicField name="*_tesm" type="text_en" stored="true" indexed="false" multiValued="true"/>
46
+ <dynamicField name="*_tesi" type="text_en" stored="true" indexed="true" multiValued="false"/>
47
+ <dynamicField name="*_tesim" type="text_en" stored="true" indexed="true" multiValued="true"/>
48
+ <!--
49
+ <dynamicField name="*_teiv" type="text_en" stored="false" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
50
+ <dynamicField name="*_teimv" type="text_en" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
51
+ -->
52
+ <dynamicField name="*_tesiv" type="text_en" stored="true" indexed="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"/>
53
+ <dynamicField name="*_tesimv" type="text_en" stored="true" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
54
+
55
+ <!-- string (_s...) -->
56
+ <!--
57
+ <dynamicField name="*_si" type="string" stored="false" indexed="true" multiValued="false"/>
58
+ <dynamicField name="*_sim" type="string" stored="false" indexed="true" multiValued="true"/>
59
+ -->
60
+ <dynamicField name="*_ss" type="string" stored="true" indexed="false" multiValued="false"/>
61
+ <dynamicField name="*_ssm" type="string" stored="true" indexed="false" multiValued="true"/>
62
+ <dynamicField name="*_ssi" type="string" stored="true" indexed="true" multiValued="false"/>
63
+ <dynamicField name="*_ssim" type="string" stored="true" indexed="true" multiValued="true"/>
64
+ <!--
65
+ <dynamicField name="*_ssort" type="alphaSort" stored="false" indexed="true" multiValued="false"/>
66
+ -->
67
+
68
+ <!-- integer (_i...) -->
69
+ <!--
70
+ <dynamicField name="*_ii" type="int" stored="false" indexed="true" multiValued="false"/>
71
+ <dynamicField name="*_iim" type="int" stored="false" indexed="true" multiValued="true"/>
72
+ -->
73
+ <dynamicField name="*_is" type="int" stored="true" indexed="false" multiValued="false"/>
74
+ <dynamicField name="*_ism" type="int" stored="true" indexed="false" multiValued="true"/>
75
+ <dynamicField name="*_isi" type="int" stored="true" indexed="true" multiValued="false"/>
76
+ <dynamicField name="*_isim" type="int" stored="true" indexed="true" multiValued="true"/>
77
+
78
+ <!-- trie integer (_it...) (for faster range queries) -->
79
+ <!--
80
+ <dynamicField name="*_iti" type="tint" stored="false" indexed="true" multiValued="false"/>
81
+ <dynamicField name="*_itim" type="tint" stored="false" indexed="true" multiValued="true"/>
82
+ -->
83
+ <dynamicField name="*_its" type="tint" stored="true" indexed="false" multiValued="false"/>
84
+ <dynamicField name="*_itsm" type="tint" stored="true" indexed="false" multiValued="true"/>
85
+ <dynamicField name="*_itsi" type="tint" stored="true" indexed="true" multiValued="false"/>
86
+ <dynamicField name="*_itsim" type="tint" stored="true" indexed="true" multiValued="true"/>
87
+
88
+ <!-- date (_dt...) -->
89
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
90
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z -->
91
+ <!--
92
+ <dynamicField name="*_dti" type="date" stored="false" indexed="true" multiValued="false"/>
93
+ <dynamicField name="*_dtim" type="date" stored="false" indexed="true" multiValued="true"/>
94
+ -->
95
+ <dynamicField name="*_dts" type="date" stored="true" indexed="false" multiValued="false"/>
96
+ <dynamicField name="*_dtsm" type="date" stored="true" indexed="false" multiValued="true"/>
97
+ <dynamicField name="*_dtsi" type="date" stored="true" indexed="true" multiValued="false"/>
98
+ <dynamicField name="*_dtsim" type="date" stored="true" indexed="true" multiValued="true"/>
99
+
100
+ <!-- trie date (_dtt...) (for faster range queries) -->
101
+ <!--
102
+ <dynamicField name="*_dtti" type="tdate" stored="false" indexed="true" multiValued="false"/>
103
+ <dynamicField name="*_dttim" type="tdate" stored="false" indexed="true" multiValued="true"/>
104
+ -->
105
+ <dynamicField name="*_dtts" type="tdate" stored="true" indexed="false" multiValued="false"/>
106
+ <dynamicField name="*_dttsm" type="tdate" stored="true" indexed="false" multiValued="true"/>
107
+ <dynamicField name="*_dttsi" type="tdate" stored="true" indexed="true" multiValued="false"/>
108
+ <dynamicField name="*_dttsim" type="tdate" stored="true" indexed="true" multiValued="true"/>
109
+
110
+ <!-- long (_l...) -->
111
+ <!--
112
+ <dynamicField name="*_li" type="long" stored="false" indexed="true" multiValued="false"/>
113
+ <dynamicField name="*_lim" type="long" stored="false" indexed="true" multiValued="true"/>
114
+ -->
115
+ <dynamicField name="*_ls" type="long" stored="true" indexed="false" multiValued="false"/>
116
+ <dynamicField name="*_lsm" type="long" stored="true" indexed="false" multiValued="true"/>
117
+ <dynamicField name="*_lsi" type="long" stored="true" indexed="true" multiValued="false"/>
118
+ <dynamicField name="*_lsim" type="long" stored="true" indexed="true" multiValued="true"/>
119
+
120
+ <!-- trie long (_lt...) (for faster range queries) -->
121
+ <!--
122
+ <dynamicField name="*_lti" type="tlong" stored="false" indexed="true" multiValued="false"/>
123
+ <dynamicField name="*_ltim" type="tlong" stored="false" indexed="true" multiValued="true"/>
124
+ -->
125
+ <dynamicField name="*_lts" type="tlong" stored="true" indexed="false" multiValued="false"/>
126
+ <dynamicField name="*_ltsm" type="tlong" stored="true" indexed="false" multiValued="true"/>
127
+ <dynamicField name="*_ltsi" type="tlong" stored="true" indexed="true" multiValued="false"/>
128
+ <dynamicField name="*_ltsim" type="tlong" stored="true" indexed="true" multiValued="true"/>
129
+
130
+ <!-- double (_db...) -->
131
+ <!--
132
+ <dynamicField name="*_dbi" type="double" stored="false" indexed="true" multiValued="false"/>
133
+ <dynamicField name="*_dbim" type="double" stored="false" indexed="true" multiValued="true"/>
134
+ -->
135
+ <dynamicField name="*_dbs" type="double" stored="true" indexed="false" multiValued="false"/>
136
+ <dynamicField name="*_dbsm" type="double" stored="true" indexed="false" multiValued="true"/>
137
+ <dynamicField name="*_dbsi" type="double" stored="true" indexed="true" multiValued="false"/>
138
+ <dynamicField name="*_dbsim" type="double" stored="true" indexed="true" multiValued="true"/>
139
+
140
+ <!-- trie double (_dbt...) (for faster range queries) -->
141
+ <!--
142
+ <dynamicField name="*_dbti" type="tdouble" stored="false" indexed="true" multiValued="false"/>
143
+ <dynamicField name="*_dbtim" type="tdouble" stored="false" indexed="true" multiValued="true"/>
144
+ -->
145
+ <dynamicField name="*_dbts" type="tdouble" stored="true" indexed="false" multiValued="false"/>
146
+ <dynamicField name="*_dbtsm" type="tdouble" stored="true" indexed="false" multiValued="true"/>
147
+ <dynamicField name="*_dbtsi" type="tdouble" stored="true" indexed="true" multiValued="false"/>
148
+ <dynamicField name="*_dbtsim" type="tdouble" stored="true" indexed="true" multiValued="true"/>
149
+
150
+ <!-- float (_f...) -->
151
+ <!--
152
+ <dynamicField name="*_fi" type="float" stored="false" indexed="true" multiValued="false"/>
153
+ <dynamicField name="*_fim" type="float" stored="false" indexed="true" multiValued="true"/>
154
+ -->
155
+ <dynamicField name="*_fs" type="float" stored="true" indexed="false" multiValued="false"/>
156
+ <dynamicField name="*_fsm" type="float" stored="true" indexed="false" multiValued="true"/>
157
+ <dynamicField name="*_fsi" type="float" stored="true" indexed="true" multiValued="false"/>
158
+ <dynamicField name="*_fsim" type="float" stored="true" indexed="true" multiValued="true"/>
159
+
160
+ <!-- trie float (_ft...) (for faster range queries) -->
161
+ <!--
162
+ <dynamicField name="*_fti" type="tfloat" stored="false" indexed="true" multiValued="false"/>
163
+ <dynamicField name="*_ftim" type="tfloat" stored="false" indexed="true" multiValued="true"/>
164
+ -->
165
+ <dynamicField name="*_fts" type="tfloat" stored="true" indexed="false" multiValued="false"/>
166
+ <dynamicField name="*_ftsm" type="tfloat" stored="true" indexed="false" multiValued="true"/>
167
+ <dynamicField name="*_ftsi" type="tfloat" stored="true" indexed="true" multiValued="false"/>
168
+ <dynamicField name="*_ftsim" type="tfloat" stored="true" indexed="true" multiValued="true"/>
169
+
170
+ <!-- boolean (_b...) -->
171
+ <!--
172
+ <dynamicField name="*_bi" type="boolean" stored="false" indexed="true" multiValued="false"/>
173
+ -->
174
+ <dynamicField name="*_bs" type="boolean" stored="true" indexed="false" multiValued="false"/>
175
+ <dynamicField name="*_bsi" type="boolean" stored="true" indexed="true" multiValued="false"/>
176
+
177
+ <!-- Type used to index the lat and lon components for the "location" FieldType -->
178
+ <!--
179
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
180
+ -->
181
+
182
+ <!-- location (_ll...) -->
183
+ <!--
184
+ <dynamicField name="*_lli" type="location" stored="false" indexed="true" multiValued="false"/>
185
+ <dynamicField name="*_llim" type="location" stored="false" indexed="true" multiValued="true"/>
186
+ -->
187
+ <dynamicField name="*_lls" type="location" stored="true" indexed="false" multiValued="false"/>
188
+ <dynamicField name="*_llsm" type="location" stored="true" indexed="false" multiValued="true"/>
189
+ <dynamicField name="*_llsi" type="location" stored="true" indexed="true" multiValued="false"/>
190
+ <dynamicField name="*_llsim" type="location" stored="true" indexed="true" multiValued="true"/>
191
+
192
+ <!-- you must define copyField source and dest fields explicity or schemaBrowser doesn't work -->
193
+ <field name="all_text_timv" type="text" stored="false" indexed="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
194
+
195
+
196
+ </fields>
197
+
198
+
199
+ <!-- Above, multiple source fields are copied to the [text] field.
200
+ Another way to map multiple source fields to the same
201
+ destination field is to use the dynamic field syntax.
202
+ copyField also supports a maxChars to copy setting. -->
203
+
204
+ <copyField source="id" dest="id_ng" maxChars="3000"/>
205
+ <copyField source="full_title_tesim" dest="full_title_ng" maxChars="3000"/>
206
+ <copyField source="*_tesim" dest="all_text_timv" maxChars="3000"/>
207
+
208
+ <types>
209
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
210
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
211
+ <fieldType name="rand" class="solr.RandomSortField" omitNorms="true"/>
212
+
213
+ <!-- Default numeric field types. -->
214
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
215
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
216
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
217
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>
218
+
219
+ <!-- trie numeric field types for faster range queries -->
220
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
221
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
222
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
223
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>
224
+
225
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z
226
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
227
+ -->
228
+ <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
229
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
230
+ <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
231
+
232
+
233
+ <!-- This point type indexes the coordinates as separate fields (subFields)
234
+ If subFieldType is defined, it references a type, and a dynamic field
235
+ definition is created matching *___<typename>. Alternately, if
236
+ subFieldSuffix is defined, that is used to create the subFields.
237
+ Example: if subFieldType="double", then the coordinates would be
238
+ indexed in fields myloc_0___double,myloc_1___double.
239
+ Example: if subFieldSuffix="_d" then the coordinates would be indexed
240
+ in fields myloc_0_d,myloc_1_d
241
+ The subFields are an implementation detail of the fieldType, and end
242
+ users normally should not need to know about them.
243
+ -->
244
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
245
+
246
+ <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
247
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
248
+
249
+ <!-- An alternative geospatial field type new to Solr 4. It supports multiValued and polygon shapes.
250
+ For more information about this and other Spatial fields new to Solr 4, see:
251
+ http://wiki.apache.org/solr/SolrAdaptersForLuceneSpatial4
252
+ -->
253
+ <fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
254
+ geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
255
+
256
+ <fieldType name="text" class="solr.TextField" omitNorms="false">
257
+ <analyzer>
258
+ <tokenizer class="solr.ICUTokenizerFactory"/>
259
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
260
+ <filter class="solr.TrimFilterFactory"/>
261
+ </analyzer>
262
+ </fieldType>
263
+
264
+ <!-- A text field that only splits on whitespace for exact matching of words -->
265
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
266
+ <analyzer>
267
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
268
+ <filter class="solr.TrimFilterFactory"/>
269
+ </analyzer>
270
+ </fieldType>
271
+
272
+ <!-- single token analyzed text, for sorting. Punctuation is significant. -->
273
+ <fieldtype name="alphaSort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
274
+ <analyzer>
275
+ <tokenizer class="solr.KeywordTokenizerFactory" />
276
+ <filter class="solr.ICUFoldingFilterFactory"/>
277
+ <filter class="solr.TrimFilterFactory" />
278
+ </analyzer>
279
+ </fieldtype>
280
+
281
+ <!-- A text field with defaults appropriate for English -->
282
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
283
+ <analyzer>
284
+ <tokenizer class="solr.ICUTokenizerFactory"/>
285
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
286
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
287
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
288
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
289
+ <!--
290
+ <filter class="solr.PorterStemFilterFactory"/>
291
+ -->
292
+ <filter class="solr.TrimFilterFactory"/>
293
+ </analyzer>
294
+ </fieldType>
295
+
296
+ <!-- A text field with defaults appropriate for English an NGrams -->
297
+ <fieldType name="text_en_ng" class="solr.TextField" positionIncrementGap="100">
298
+ <analyzer type="index">
299
+ <tokenizer class="solr.ICUTokenizerFactory"/>
300
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
301
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
302
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
303
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
304
+ <filter class="solr.TrimFilterFactory"/>
305
+ <filter class="solr.EdgeNGramFilterFactory" minGramSize="3" maxGramSize="15" side="front"/>:
306
+ </analyzer>
307
+
308
+ <analyzer type="index">
309
+ <tokenizer class="solr.ICUTokenizerFactory"/>
310
+ <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
311
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
312
+ <!-- EnglishMinimalStemFilterFactory is less aggressive than PorterStemFilterFactory: -->
313
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
314
+ <filter class="solr.TrimFilterFactory"/>
315
+ </analyzer>
316
+ </fieldType>
317
+
318
+ <fieldType name="pid_text" class="solr.TextField" positionIncrementGap="100">
319
+ <analyzer>
320
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
321
+ </analyzer>
322
+ </fieldType>
323
+
324
+ <!-- queries for paths match documents at that path, or in descendent paths -->
325
+ <fieldType name="descendent_path" class="solr.TextField">
326
+ <analyzer type="index">
327
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
328
+ </analyzer>
329
+ <analyzer type="query">
330
+ <tokenizer class="solr.KeywordTokenizerFactory" />
331
+ </analyzer>
332
+ </fieldType>
333
+
334
+ <!-- queries for paths match documents at that path, or in ancestor paths -->
335
+ <fieldType name="ancestor_path" class="solr.TextField">
336
+ <analyzer type="index">
337
+ <tokenizer class="solr.KeywordTokenizerFactory" />
338
+ </analyzer>
339
+ <analyzer type="query">
340
+ <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
341
+ </analyzer>
342
+ </fieldType>
343
+
344
+ </types>
345
+
346
+ </schema>