exhibits_solr_conf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ # Punctuation characters we want to ignore as terms (i.e., when surrounded
2
+ # by whitespace in a query, like 'fred : the puppy') in queries
3
+ # ONLY FOR SINGLE TOKEN ANALYZED FIELDS
4
+ # see https://issues.apache.org/jira/browse/SOLR-3085
5
+ # Note that hyphens, plusses, and double hyphens are not treated as terms
6
+ # per debugQuery
7
+ :
8
+ ;
9
+ &
10
+ /
11
+ =
12
+ >
13
+ <
14
+ ,
15
+ .
16
+ (
17
+ )
18
+
19
+ »
20
+ §
21
+
22
+ ·
@@ -0,0 +1,73 @@
1
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
2
+ # A synonym file for Solr SynonymFilterFactory.
3
+ # Needs to be included at both index and query time
4
+ # AFTER the case folding
5
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
6
+ # e.g.
7
+ # <analyzer>
8
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
9
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
10
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
11
+ # <filter class="solr.WordDelimiterFilterFactory" ...
12
+ # ...
13
+ #
14
+ # !!!
15
+ # !!! IMPORTANT: see also synonyms_both_anchors.txt, synonyms_left_anchor.txt and synonyms_right_anchor.txt
16
+ # !!!
17
+
18
+ # Explicit mappings match any token sequence on the LHS of "=>"
19
+ # and replace with all alternatives on the RHS. These types of mappings
20
+ # ignore the expand parameter in the schema.
21
+ # Equivalent synonyms may be separated with commas and give
22
+ # no explicit mapping. In this case the mapping behavior will
23
+ # be taken from the expand parameter in the schema.
24
+ # If expand==true, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
25
+ # ipod, i-pod, i pod => ipod, i-pod, i pod
26
+ # If expand==false, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
27
+ # ipod, i-pod, i pod => ipod
28
+ # set expand to true for index time and false for query time
29
+
30
+ # See SW-845
31
+ # "Dept." will change to "Department"
32
+ # "Koran" will change to "Qur'an"
33
+ # "violoncello" will change to "cello"
34
+ # "O.T." and "N.T." will change to "Old Testament" and "New Testament"
35
+ # note that mapping TO the abbreviation improves recall but reduces precision:
36
+ # O.T. can mean Old Testament or overtime; dept could be a word in some
37
+ # language.
38
+ department => dept
39
+ qurʼan, qur'an, quran, qorʼan, qor'an, qoran => koran
40
+ violoncello, violincello => cello
41
+ # multi-token synonyms, and synonyms with punctuation, can be problematic
42
+ #old testament => o.t.
43
+ #new testament => n.t.
44
+
45
+ # The below is inspired by Jonathan Rochkind at Johns Hopkins University, 2013-04-15
46
+
47
+ # punctuation-including terms we want to whitelist protect and make searchable.
48
+ # We do this by mapping them to unique tokens that do not include punctuation
49
+
50
+ # computer languages
51
+ # these are explicit mappings so when WDF drops the non-letter chars, c++ is not equivalent to c
52
+ c++ => cplusplus
53
+ j#, j♯ => jsssharp
54
+ # c# and f# are music keys as well as computer languages
55
+
56
+ # musical keys
57
+ # these are explicit mappings so when WDF drops the non-letter chars, c# is not equivalent to c
58
+ # We map from number-sign (#), musical sharp (♯)
59
+ a#, a♯, a-sharp => a sharp
60
+ b#, b♯, b-sharp => b sharp
61
+ c#, c♯, c-sharp => c sharp
62
+ d#, d♯, d-sharp => d sharp
63
+ e#, e♯, e-sharp => e sharp
64
+ f#, f♯, f-sharp => f sharp
65
+ g#, g♯, g-sharp => g sharp
66
+ # We map both from lowercase b and musical flat (♭)
67
+ ab, a♭, a-flat => a flat
68
+ bb, b♭, b-flat => b flat
69
+ cb, c♭, c-flat => c flat
70
+ db, d♭, d-flat => d flat
71
+ eb, e♭, e-flat => e flat
72
+ fb, f♭, f-flat => f flat
73
+ gb, g♭, g-flat => g flat
@@ -0,0 +1,47 @@
1
+ # Include in analysis with both left anchor of 'aaaaaa' and right anchor of 'zzzzzz'
2
+ # for query or field comprised solely of token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning and ending anchors on field value, removing trailing chars -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
18
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/>
19
+ # <filter class="solr.WordDelimiterFilterFactory" ...
20
+ # ...
21
+ #
22
+
23
+ aaaaaadepartmentzzzzzz => aaaaaadeptzzzzzz
24
+ aaaaaaqurʼanzzzzzz, aaaaaaqur'anzzzzzz, aaaaaaquranzzzzzz, aaaaaaqorʼanzzzzzz, aaaaaaqor'anzzzzzz, aaaaaaqoranzzzzzz => aaaaaakoranzzzzzz
25
+ aaaaaavioloncellozzzzzz, aaaaaaviolincellozzzzzz => aaaaaacellozzzzzz
26
+
27
+ # computer languages
28
+ aaaaaac++zzzzzz => aaaaaacpluspluszzzzzz
29
+ aaaaaaj#zzzzzz, aaaaaaj♯zzzzzz => aaaaaajsssharpzzzzzz
30
+
31
+ # musical keys
32
+ # We map from number-sign (#), musical sharp (♯)
33
+ aaaaaaa#zzzzzz, aaaaaaa♯zzzzzz, aaaaaaa-sharpzzzzzz => aaaaaaa sharpzzzzzz
34
+ aaaaaab#zzzzzz, aaaaaab♯zzzzzz, aaaaaab-sharpzzzzzz => aaaaaab sharpzzzzzz
35
+ aaaaaac#zzzzzz, aaaaaac♯zzzzzz, aaaaaac-sharpzzzzzz => aaaaaac sharpzzzzzz
36
+ aaaaaad#zzzzzz, aaaaaad♯zzzzzz, aaaaaad-sharpzzzzzz => aaaaaad sharpzzzzzz
37
+ aaaaaae#zzzzzz, aaaaaae♯zzzzzz, aaaaaae-sharpzzzzzz => aaaaaae sharpzzzzzz
38
+ aaaaaaf#zzzzzz, aaaaaaf♯zzzzzz, aaaaaaf-sharpzzzzzz => aaaaaaf sharpzzzzzz
39
+ aaaaaag#zzzzzz, aaaaaag♯zzzzzz, aaaaaag-sharpzzzzzz => aaaaaag sharpzzzzzz
40
+ # We map both from lowercase b and musical flat (♭)
41
+ aaaaaaabzzzzzz, aaaaaaa♭zzzzzz, aaaaaaa-flatzzzzzz => aaaaaaa flatzzzzzz
42
+ aaaaaabbzzzzzz, aaaaaab♭zzzzzz, aaaaaab-flatzzzzzz => aaaaaab flatzzzzzz
43
+ aaaaaacbzzzzzz, aaaaaac♭zzzzzz, aaaaaac-flatzzzzzz => aaaaaac flatzzzzzz
44
+ aaaaaadbzzzzzz, aaaaaad♭zzzzzz, aaaaaad-flatzzzzzz => aaaaaad flatzzzzzz
45
+ aaaaaaebzzzzzz, aaaaaae♭zzzzzz, aaaaaae-flatzzzzzz => aaaaaae flatzzzzzz
46
+ aaaaaafbzzzzzz, aaaaaaf♭zzzzzz, aaaaaaf-flatzzzzzz => aaaaaaf flatzzzzzz
47
+ aaaaaagbzzzzzz, aaaaaag♭zzzzzz, aaaaaag-flatzzzzzz => aaaaaag flatzzzzzz
@@ -0,0 +1,45 @@
1
+ # Include in analysis with left anchor of 'aaaaaa'
2
+ # for query or field beginning with token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning anchor on field value, assume first non-whitespace char is unicode letter or number or symbol -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*([\p{L}\p{N}\p{S}]{1})" replacement="aaaaaa$1"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.WordDelimiterFilterFactory" ...
18
+ # ...
19
+ #
20
+
21
+ aaaaaadepartment => aaaaaadept
22
+ aaaaaaqurʼan, aaaaaaqur'an, aaaaaaquran, aaaaaaqorʼan, aaaaaaqor'an, aaaaaaqoran => aaaaaakoran
23
+ aaaaaavioloncello, aaaaaaviolincello => aaaaaacello
24
+
25
+ # computer languages
26
+ aaaaaac++ => aaaaaacplusplus
27
+ aaaaaaj#, aaaaaaj♯ => aaaaaajsssharp
28
+
29
+ # musical keys
30
+ # We map from number-sign (#), musical sharp (♯)
31
+ aaaaaaa#, aaaaaaa♯, aaaaaaa-sharp => aaaaaaa sharp
32
+ aaaaaab#, aaaaaab♯, aaaaaab-sharp => aaaaaab sharp
33
+ aaaaaac#, aaaaaac♯, aaaaaac-sharp => aaaaaac sharp
34
+ aaaaaad#, aaaaaad♯, aaaaaad-sharp => aaaaaad sharp
35
+ aaaaaae#, aaaaaae♯, aaaaaae-sharp => aaaaaae sharp
36
+ aaaaaaf#, aaaaaaf♯, aaaaaaf-sharp => aaaaaaf sharp
37
+ aaaaaag#, aaaaaag♯, aaaaaag-sharp => aaaaaag sharp
38
+ # We map both from lowercase b and musical flat (♭)
39
+ aaaaaaab, aaaaaaa♭, aaaaaaa-flat => aaaaaaa flat
40
+ aaaaaabb, aaaaaab♭, aaaaaab-flat => aaaaaab flat
41
+ aaaaaacb, aaaaaac♭, aaaaaac-flat => aaaaaac flat
42
+ aaaaaadb, aaaaaad♭, aaaaaad-flat => aaaaaad flat
43
+ aaaaaaeb, aaaaaae♭, aaaaaae-flat => aaaaaae flat
44
+ aaaaaafb, aaaaaaf♭, aaaaaaf-flat => aaaaaaf flat
45
+ aaaaaagb, aaaaaag♭, aaaaaag-flat => aaaaaag flat
@@ -0,0 +1,47 @@
1
+ # Include in analysis with right anchor of 'zzzzzz'
2
+ # for query or field ending with token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning and ending anchors on field value, removing trailing chars -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
18
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/>
19
+ # <filter class="solr.WordDelimiterFilterFactory" ...
20
+ # ...
21
+ #
22
+
23
+ departmentzzzzzz => deptzzzzzz
24
+ qurʼanzzzzzz, qur'anzzzzzz, quranzzzzzz, qorʼanzzzzzz, qor'anzzzzzz, qoranzzzzzz => koranzzzzzz
25
+ violoncellozzzzzz, violincellozzzzzz => cellozzzzzz
26
+
27
+ # computer languages
28
+ c++zzzzzz => cpluspluszzzzzz
29
+ j#zzzzzz, j♯zzzzzz => jsssharpzzzzzz
30
+
31
+ # musical keys
32
+ # We map from number-sign (#), musical sharp (♯)
33
+ a#zzzzzz, a♯zzzzzz, a-sharpzzzzzz => a sharpzzzzzz
34
+ b#zzzzzz, b♯zzzzzz, b-sharpzzzzzz => b sharpzzzzzz
35
+ c#zzzzzz, c♯zzzzzz, c-sharpzzzzzz => c sharpzzzzzz
36
+ d#zzzzzz, d♯zzzzzz, d-sharpzzzzzz => d sharpzzzzzz
37
+ e#zzzzzz, e♯zzzzzz, e-sharpzzzzzz => e sharpzzzzzz
38
+ f#zzzzzz, f♯zzzzzz, f-sharpzzzzzz => f sharpzzzzzz
39
+ g#zzzzzz, g♯zzzzzz, g-sharpzzzzzz => g sharpzzzzzz
40
+ # We map both from lowercase b and musical flat (♭)
41
+ abzzzzzz, a♭zzzzzz, a-flatzzzzzz => a flatzzzzzz
42
+ bbzzzzzz, b♭zzzzzz, b-flatzzzzzz => b flatzzzzzz
43
+ cbzzzzzz, c♭zzzzzz, c-flatzzzzzz => c flatzzzzzz
44
+ dbzzzzzz, d♭zzzzzz, d-flatzzzzzz => d flatzzzzzz
45
+ ebzzzzzz, e♭zzzzzz, e-flatzzzzzz => e flatzzzzzz
46
+ fbzzzzzz, f♭zzzzzz, f-flatzzzzzz => f flatzzzzzz
47
+ gbzzzzzz, g♭zzzzzz, g-flatzzzzzz => g flatzzzzzz
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: exhibits_solr_conf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Naomi Dushay
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jettywrapper
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: hurley
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop-rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: A set of Solr config files and a rake task for testing sul-dlss exhibit
112
+ and spotlight git repos (sul-exhibits-templates, spotlight-dor-resources, exhibits-requests
113
+ email:
114
+ - ndushay@stanford.edu
115
+ executables: []
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".gitignore"
120
+ - ".hound.yml"
121
+ - ".rspec"
122
+ - ".rubocop.yml"
123
+ - ".rubocop_todo.yml"
124
+ - ".travis.yml"
125
+ - Gemfile
126
+ - LICENSE
127
+ - README.md
128
+ - Rakefile
129
+ - bin/console
130
+ - bin/setup
131
+ - exhibits_solr_conf.gemspec
132
+ - lib/exhibits_solr_conf.rb
133
+ - lib/exhibits_solr_conf/tasks/configure_solr.rake
134
+ - lib/exhibits_solr_conf/version.rb
135
+ - solr_conf_4_testing/_rest_managed.json
136
+ - solr_conf_4_testing/schema.xml
137
+ - solr_conf_4_testing/solrconfig.xml
138
+ - solr_conf_4_testing/stopwords_punctuation.txt
139
+ - solr_conf_4_testing/synonyms.txt
140
+ - solr_conf_4_testing/synonyms_both_anchors.txt
141
+ - solr_conf_4_testing/synonyms_left_anchor.txt
142
+ - solr_conf_4_testing/synonyms_right_anchor.txt
143
+ homepage: https://github.com/sul-dlss/exhibits_solr_conf
144
+ licenses:
145
+ - Apache-2.0
146
+ metadata: {}
147
+ post_install_message:
148
+ rdoc_options: []
149
+ require_paths:
150
+ - lib
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
161
+ requirements: []
162
+ rubyforge_project:
163
+ rubygems_version: 2.4.8
164
+ signing_key:
165
+ specification_version: 4
166
+ summary: Solr config files for testing sul-dlss exhibit and spotlight git repos.
167
+ test_files: []
168
+ has_rdoc: