exhibits_solr_conf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ # Punctuation characters we want to ignore as terms (i.e., when surrounded
2
+ # by whitespace in a query, like 'fred : the puppy') in queries
3
+ # ONLY FOR SINGLE TOKEN ANALYZED FIELDS
4
+ # see https://issues.apache.org/jira/browse/SOLR-3085
5
+ # Note that hyphens, plusses, and double hyphens are not treated as terms
6
+ # per debugQuery
7
+ :
8
+ ;
9
+ &
10
+ /
11
+ =
12
+ >
13
+ <
14
+ ,
15
+ .
16
+ (
17
+ )
18
+
19
+ »
20
+ §
21
+
22
+ ·
@@ -0,0 +1,73 @@
1
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
2
+ # A synonym file for Solr SynonymFilterFactory.
3
+ # Needs to be included at both index and query time
4
+ # AFTER the case folding
5
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
6
+ # e.g.
7
+ # <analyzer>
8
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
9
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
10
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
11
+ # <filter class="solr.WordDelimiterFilterFactory" ...
12
+ # ...
13
+ #
14
+ # !!!
15
+ # !!! IMPORTANT: see also synonyms_both_anchors.txt, synonyms_left_anchor.txt and synonyms_right_anchor.txt
16
+ # !!!
17
+
18
+ # Explicit mappings match any token sequence on the LHS of "=>"
19
+ # and replace with all alternatives on the RHS. These types of mappings
20
+ # ignore the expand parameter in the schema.
21
+ # Equivalent synonyms may be separated with commas and give
22
+ # no explicit mapping. In this case the mapping behavior will
23
+ # be taken from the expand parameter in the schema.
24
+ # If expand==true, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
25
+ # ipod, i-pod, i pod => ipod, i-pod, i pod
26
+ # If expand==false, "ipod, i-pod, i pod" is equivalent to the explicit mapping:
27
+ # ipod, i-pod, i pod => ipod
28
+ # set expand to true for index time and false for query time
29
+
30
+ # See SW-845
31
+ # "Dept." will change to "Department"
32
+ # "Koran" will change to "Qur'an"
33
+ # "violoncello" will change to "cello"
34
+ # "O.T." and "N.T." will change to "Old Testament" and "New Testament"
35
+ # note that mapping TO the abbreviation improves recall but reduces precision:
36
+ # O.T. can mean Old Testament or overtime; dept could be a word in some
37
+ # language.
38
+ department => dept
39
+ qurʼan, qur'an, quran, qorʼan, qor'an, qoran => koran
40
+ violoncello, violincello => cello
41
+ # multi-token synonyms, and synonyms with punctuation, can be problematic
42
+ #old testament => o.t.
43
+ #new testament => n.t.
44
+
45
+ # The below is inspired by Jonathan Rochkind at Johns Hopkins University, 2013-04-15
46
+
47
+ # punctuation-including terms we want to whitelist protect and make searchable.
48
+ # We do this by mapping them to unique tokens that do not include punctuation
49
+
50
+ # computer languages
51
+ # these are explicit mappings so when WDF drops the non-letter chars, c++ is not equivalent to c
52
+ c++ => cplusplus
53
+ j#, j♯ => jsssharp
54
+ # c# and f# are music keys as well as computer languages
55
+
56
+ # musical keys
57
+ # these are explicit mappings so when WDF drops the non-letter chars, c# is not equivalent to c
58
+ # We map from number-sign (#), musical sharp (♯)
59
+ a#, a♯, a-sharp => a sharp
60
+ b#, b♯, b-sharp => b sharp
61
+ c#, c♯, c-sharp => c sharp
62
+ d#, d♯, d-sharp => d sharp
63
+ e#, e♯, e-sharp => e sharp
64
+ f#, f♯, f-sharp => f sharp
65
+ g#, g♯, g-sharp => g sharp
66
+ # We map both from lowercase b and musical flat (♭)
67
+ ab, a♭, a-flat => a flat
68
+ bb, b♭, b-flat => b flat
69
+ cb, c♭, c-flat => c flat
70
+ db, d♭, d-flat => d flat
71
+ eb, e♭, e-flat => e flat
72
+ fb, f♭, f-flat => f flat
73
+ gb, g♭, g-flat => g flat
@@ -0,0 +1,47 @@
1
+ # Include in analysis with both left anchor of 'aaaaaa' and right anchor of 'zzzzzz'
2
+ # for query or field comprised solely of token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning and ending anchors on field value, removing trailing chars -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
18
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/>
19
+ # <filter class="solr.WordDelimiterFilterFactory" ...
20
+ # ...
21
+ #
22
+
23
+ aaaaaadepartmentzzzzzz => aaaaaadeptzzzzzz
24
+ aaaaaaqurʼanzzzzzz, aaaaaaqur'anzzzzzz, aaaaaaquranzzzzzz, aaaaaaqorʼanzzzzzz, aaaaaaqor'anzzzzzz, aaaaaaqoranzzzzzz => aaaaaakoranzzzzzz
25
+ aaaaaavioloncellozzzzzz, aaaaaaviolincellozzzzzz => aaaaaacellozzzzzz
26
+
27
+ # computer languages
28
+ aaaaaac++zzzzzz => aaaaaacpluspluszzzzzz
29
+ aaaaaaj#zzzzzz, aaaaaaj♯zzzzzz => aaaaaajsssharpzzzzzz
30
+
31
+ # musical keys
32
+ # We map from number-sign (#), musical sharp (♯)
33
+ aaaaaaa#zzzzzz, aaaaaaa♯zzzzzz, aaaaaaa-sharpzzzzzz => aaaaaaa sharpzzzzzz
34
+ aaaaaab#zzzzzz, aaaaaab♯zzzzzz, aaaaaab-sharpzzzzzz => aaaaaab sharpzzzzzz
35
+ aaaaaac#zzzzzz, aaaaaac♯zzzzzz, aaaaaac-sharpzzzzzz => aaaaaac sharpzzzzzz
36
+ aaaaaad#zzzzzz, aaaaaad♯zzzzzz, aaaaaad-sharpzzzzzz => aaaaaad sharpzzzzzz
37
+ aaaaaae#zzzzzz, aaaaaae♯zzzzzz, aaaaaae-sharpzzzzzz => aaaaaae sharpzzzzzz
38
+ aaaaaaf#zzzzzz, aaaaaaf♯zzzzzz, aaaaaaf-sharpzzzzzz => aaaaaaf sharpzzzzzz
39
+ aaaaaag#zzzzzz, aaaaaag♯zzzzzz, aaaaaag-sharpzzzzzz => aaaaaag sharpzzzzzz
40
+ # We map both from lowercase b and musical flat (♭)
41
+ aaaaaaabzzzzzz, aaaaaaa♭zzzzzz, aaaaaaa-flatzzzzzz => aaaaaaa flatzzzzzz
42
+ aaaaaabbzzzzzz, aaaaaab♭zzzzzz, aaaaaab-flatzzzzzz => aaaaaab flatzzzzzz
43
+ aaaaaacbzzzzzz, aaaaaac♭zzzzzz, aaaaaac-flatzzzzzz => aaaaaac flatzzzzzz
44
+ aaaaaadbzzzzzz, aaaaaad♭zzzzzz, aaaaaad-flatzzzzzz => aaaaaad flatzzzzzz
45
+ aaaaaaebzzzzzz, aaaaaae♭zzzzzz, aaaaaae-flatzzzzzz => aaaaaae flatzzzzzz
46
+ aaaaaafbzzzzzz, aaaaaaf♭zzzzzz, aaaaaaf-flatzzzzzz => aaaaaaf flatzzzzzz
47
+ aaaaaagbzzzzzz, aaaaaag♭zzzzzz, aaaaaag-flatzzzzzz => aaaaaag flatzzzzzz
@@ -0,0 +1,45 @@
1
+ # Include in analysis with left anchor of 'aaaaaa'
2
+ # for query or field beginning with token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning anchor on field value, assume first non-whitespace char is unicode letter or number or symbol -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*([\p{L}\p{N}\p{S}]{1})" replacement="aaaaaa$1"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.WordDelimiterFilterFactory" ...
18
+ # ...
19
+ #
20
+
21
+ aaaaaadepartment => aaaaaadept
22
+ aaaaaaqurʼan, aaaaaaqur'an, aaaaaaquran, aaaaaaqorʼan, aaaaaaqor'an, aaaaaaqoran => aaaaaakoran
23
+ aaaaaavioloncello, aaaaaaviolincello => aaaaaacello
24
+
25
+ # computer languages
26
+ aaaaaac++ => aaaaaacplusplus
27
+ aaaaaaj#, aaaaaaj♯ => aaaaaajsssharp
28
+
29
+ # musical keys
30
+ # We map from number-sign (#), musical sharp (♯)
31
+ aaaaaaa#, aaaaaaa♯, aaaaaaa-sharp => aaaaaaa sharp
32
+ aaaaaab#, aaaaaab♯, aaaaaab-sharp => aaaaaab sharp
33
+ aaaaaac#, aaaaaac♯, aaaaaac-sharp => aaaaaac sharp
34
+ aaaaaad#, aaaaaad♯, aaaaaad-sharp => aaaaaad sharp
35
+ aaaaaae#, aaaaaae♯, aaaaaae-sharp => aaaaaae sharp
36
+ aaaaaaf#, aaaaaaf♯, aaaaaaf-sharp => aaaaaaf sharp
37
+ aaaaaag#, aaaaaag♯, aaaaaag-sharp => aaaaaag sharp
38
+ # We map both from lowercase b and musical flat (♭)
39
+ aaaaaaab, aaaaaaa♭, aaaaaaa-flat => aaaaaaa flat
40
+ aaaaaabb, aaaaaab♭, aaaaaab-flat => aaaaaab flat
41
+ aaaaaacb, aaaaaac♭, aaaaaac-flat => aaaaaac flat
42
+ aaaaaadb, aaaaaad♭, aaaaaad-flat => aaaaaad flat
43
+ aaaaaaeb, aaaaaae♭, aaaaaae-flat => aaaaaae flat
44
+ aaaaaafb, aaaaaaf♭, aaaaaaf-flat => aaaaaaf flat
45
+ aaaaaagb, aaaaaag♭, aaaaaag-flat => aaaaaag flat
@@ -0,0 +1,47 @@
1
+ # Include in analysis with right anchor of 'zzzzzz'
2
+ # for query or field ending with token meant to be a synonym
3
+ #
4
+ # http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
5
+ # A synonym file for Solr SynonymFilterFactory.
6
+ # Needs to be included at both index and query time
7
+ # AFTER the case folding
8
+ # BEFORE the WordDelimiterFilterFactory that removes punctuation
9
+ # e.g.
10
+ # <analyzer>
11
+ # <!-- put beginning and ending anchors on field value, removing trailing chars -->
12
+ # <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/>
13
+ # <tokenizer class="solr.WhitespaceTokenizerFactory" />
14
+ # <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed -->
15
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
16
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/>
17
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/>
18
+ # <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/>
19
+ # <filter class="solr.WordDelimiterFilterFactory" ...
20
+ # ...
21
+ #
22
+
23
+ departmentzzzzzz => deptzzzzzz
24
+ qurʼanzzzzzz, qur'anzzzzzz, quranzzzzzz, qorʼanzzzzzz, qor'anzzzzzz, qoranzzzzzz => koranzzzzzz
25
+ violoncellozzzzzz, violincellozzzzzz => cellozzzzzz
26
+
27
+ # computer languages
28
+ c++zzzzzz => cpluspluszzzzzz
29
+ j#zzzzzz, j♯zzzzzz => jsssharpzzzzzz
30
+
31
+ # musical keys
32
+ # We map from number-sign (#), musical sharp (♯)
33
+ a#zzzzzz, a♯zzzzzz, a-sharpzzzzzz => a sharpzzzzzz
34
+ b#zzzzzz, b♯zzzzzz, b-sharpzzzzzz => b sharpzzzzzz
35
+ c#zzzzzz, c♯zzzzzz, c-sharpzzzzzz => c sharpzzzzzz
36
+ d#zzzzzz, d♯zzzzzz, d-sharpzzzzzz => d sharpzzzzzz
37
+ e#zzzzzz, e♯zzzzzz, e-sharpzzzzzz => e sharpzzzzzz
38
+ f#zzzzzz, f♯zzzzzz, f-sharpzzzzzz => f sharpzzzzzz
39
+ g#zzzzzz, g♯zzzzzz, g-sharpzzzzzz => g sharpzzzzzz
40
+ # We map both from lowercase b and musical flat (♭)
41
+ abzzzzzz, a♭zzzzzz, a-flatzzzzzz => a flatzzzzzz
42
+ bbzzzzzz, b♭zzzzzz, b-flatzzzzzz => b flatzzzzzz
43
+ cbzzzzzz, c♭zzzzzz, c-flatzzzzzz => c flatzzzzzz
44
+ dbzzzzzz, d♭zzzzzz, d-flatzzzzzz => d flatzzzzzz
45
+ ebzzzzzz, e♭zzzzzz, e-flatzzzzzz => e flatzzzzzz
46
+ fbzzzzzz, f♭zzzzzz, f-flatzzzzzz => f flatzzzzzz
47
+ gbzzzzzz, g♭zzzzzz, g-flatzzzzzz => g flatzzzzzz
metadata ADDED
@@ -0,0 +1,168 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: exhibits_solr_conf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Naomi Dushay
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-11-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: jettywrapper
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: hurley
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rubocop-rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: A set of Solr config files and a rake task for testing sul-dlss exhibit
112
+ and spotlight git repos (sul-exhibits-templates, spotlight-dor-resources, exhibits-requests
113
+ email:
114
+ - ndushay@stanford.edu
115
+ executables: []
116
+ extensions: []
117
+ extra_rdoc_files: []
118
+ files:
119
+ - ".gitignore"
120
+ - ".hound.yml"
121
+ - ".rspec"
122
+ - ".rubocop.yml"
123
+ - ".rubocop_todo.yml"
124
+ - ".travis.yml"
125
+ - Gemfile
126
+ - LICENSE
127
+ - README.md
128
+ - Rakefile
129
+ - bin/console
130
+ - bin/setup
131
+ - exhibits_solr_conf.gemspec
132
+ - lib/exhibits_solr_conf.rb
133
+ - lib/exhibits_solr_conf/tasks/configure_solr.rake
134
+ - lib/exhibits_solr_conf/version.rb
135
+ - solr_conf_4_testing/_rest_managed.json
136
+ - solr_conf_4_testing/schema.xml
137
+ - solr_conf_4_testing/solrconfig.xml
138
+ - solr_conf_4_testing/stopwords_punctuation.txt
139
+ - solr_conf_4_testing/synonyms.txt
140
+ - solr_conf_4_testing/synonyms_both_anchors.txt
141
+ - solr_conf_4_testing/synonyms_left_anchor.txt
142
+ - solr_conf_4_testing/synonyms_right_anchor.txt
143
+ homepage: https://github.com/sul-dlss/exhibits_solr_conf
144
+ licenses:
145
+ - Apache-2.0
146
+ metadata: {}
147
+ post_install_message:
148
+ rdoc_options: []
149
+ require_paths:
150
+ - lib
151
+ required_ruby_version: !ruby/object:Gem::Requirement
152
+ requirements:
153
+ - - ">="
154
+ - !ruby/object:Gem::Version
155
+ version: '0'
156
+ required_rubygems_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '0'
161
+ requirements: []
162
+ rubyforge_project:
163
+ rubygems_version: 2.4.8
164
+ signing_key:
165
+ specification_version: 4
166
+ summary: Solr config files for testing sul-dlss exhibit and spotlight git repos.
167
+ test_files: []
168
+ has_rdoc: